LLVM 20.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
23#include "llvm/IR/GlobalAlias.h"
25#include "llvm/IR/Intrinsics.h"
26#include "llvm/IR/IntrinsicsS390.h"
30#include <cctype>
31#include <optional>
32
33using namespace llvm;
34
35#define DEBUG_TYPE "systemz-lower"
36
37// Temporarily let this be disabled by default until all known problems
38// related to argument extensions are fixed.
40 "argext-abi-check", cl::init(false),
41 cl::desc("Verify that narrow int args are properly extended per the "
42 "SystemZ ABI."));
43
44namespace {
45// Represents information about a comparison.
46struct Comparison {
47 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
48 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
49 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
50
51 // The operands to the comparison.
52 SDValue Op0, Op1;
53
54 // Chain if this is a strict floating-point comparison.
55 SDValue Chain;
56
57 // The opcode that should be used to compare Op0 and Op1.
58 unsigned Opcode;
59
60 // A SystemZICMP value. Only used for integer comparisons.
61 unsigned ICmpType;
62
63 // The mask of CC values that Opcode can produce.
64 unsigned CCValid;
65
66 // The mask of CC values for which the original condition is true.
67 unsigned CCMask;
68};
69} // end anonymous namespace
70
71// Classify VT as either 32 or 64 bit.
72static bool is32Bit(EVT VT) {
73 switch (VT.getSimpleVT().SimpleTy) {
74 case MVT::i32:
75 return true;
76 case MVT::i64:
77 return false;
78 default:
79 llvm_unreachable("Unsupported type");
80 }
81}
82
83// Return a version of MachineOperand that can be safely used before the
84// final use.
86 if (Op.isReg())
87 Op.setIsKill(false);
88 return Op;
89}
90
92 const SystemZSubtarget &STI)
93 : TargetLowering(TM), Subtarget(STI) {
94 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
95
96 auto *Regs = STI.getSpecialRegisters();
97
98 // Set up the register classes.
99 if (Subtarget.hasHighWord())
100 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
101 else
102 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
103 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
104 if (!useSoftFloat()) {
105 if (Subtarget.hasVector()) {
106 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
107 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
108 } else {
109 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
110 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
111 }
112 if (Subtarget.hasVectorEnhancements1())
113 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
114 else
115 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
116
117 if (Subtarget.hasVector()) {
118 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
119 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
120 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
121 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
122 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
123 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
124 }
125
126 if (Subtarget.hasVector())
127 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
128 }
129
130 // Compute derived properties from the register classes
132
133 // Set up special registers.
134 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
135
136 // TODO: It may be better to default to latency-oriented scheduling, however
137 // LLVM's current latency-oriented scheduler can't handle physreg definitions
138 // such as SystemZ has with CC, so set this to the register-pressure
139 // scheduler, because it can.
141
144
146
147 // Instructions are strings of 2-byte aligned 2-byte values.
149 // For performance reasons we prefer 16-byte alignment.
151
152 // Handle operations that are handled in a similar way for all types.
153 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
154 I <= MVT::LAST_FP_VALUETYPE;
155 ++I) {
157 if (isTypeLegal(VT)) {
158 // Lower SET_CC into an IPM-based sequence.
162
163 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
165
166 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
169 }
170 }
171
172 // Expand jump table branches as address arithmetic followed by an
173 // indirect jump.
175
176 // Expand BRCOND into a BR_CC (see above).
178
179 // Handle integer types except i128.
180 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
181 I <= MVT::LAST_INTEGER_VALUETYPE;
182 ++I) {
184 if (isTypeLegal(VT) && VT != MVT::i128) {
186
187 // Expand individual DIV and REMs into DIVREMs.
194
195 // Support addition/subtraction with overflow.
198
199 // Support addition/subtraction with carry.
202
203 // Support carry in as value rather than glue.
206
207 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
208 // available, or if the operand is constant.
210
211 // Use POPCNT on z196 and above.
212 if (Subtarget.hasPopulationCount())
214 else
216
217 // No special instructions for these.
220
221 // Use *MUL_LOHI where possible instead of MULH*.
226
227 // Only z196 and above have native support for conversions to unsigned.
228 // On z10, promoting to i64 doesn't generate an inexact condition for
229 // values that are outside the i32 range but in the i64 range, so use
230 // the default expansion.
231 if (!Subtarget.hasFPExtension())
233
234 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
235 // default to Expand, so need to be modified to Legal where appropriate.
237 if (Subtarget.hasFPExtension())
239
240 // And similarly for STRICT_[SU]INT_TO_FP.
242 if (Subtarget.hasFPExtension())
244 }
245 }
246
247 // Handle i128 if legal.
248 if (isTypeLegal(MVT::i128)) {
249 // No special instructions for these.
256
257 // No special instructions for these before arch15.
258 if (!Subtarget.hasVectorEnhancements3()) {
268 } else {
269 // Even if we do have a legal 128-bit multiply, we do not
270 // want 64-bit multiply-high operations to use it.
273 }
274
275 // Support addition/subtraction with carry.
280
281 // Use VPOPCT and add up partial results.
283
284 // Additional instructions available with arch15.
285 if (Subtarget.hasVectorEnhancements3()) {
286 setOperationAction(ISD::ABS, MVT::i128, Legal);
287 }
288
289 // We have to use libcalls for these.
298 }
299
300 // Type legalization will convert 8- and 16-bit atomic operations into
301 // forms that operate on i32s (but still keeping the original memory VT).
302 // Lower them into full i32 operations.
314
315 // Whether or not i128 is not a legal type, we need to custom lower
316 // the atomic operations in order to exploit SystemZ instructions.
321
322 // Mark sign/zero extending atomic loads as legal, which will make
323 // DAGCombiner fold extensions into atomic loads if possible.
325 {MVT::i8, MVT::i16, MVT::i32}, Legal);
327 {MVT::i8, MVT::i16}, Legal);
329 MVT::i8, Legal);
330
331 // We can use the CC result of compare-and-swap to implement
332 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
336
338
339 // Traps are legal, as we will convert them to "j .+2".
340 setOperationAction(ISD::TRAP, MVT::Other, Legal);
341
342 // z10 has instructions for signed but not unsigned FP conversion.
343 // Handle unsigned 32-bit types as signed 64-bit types.
344 if (!Subtarget.hasFPExtension()) {
349 }
350
351 // We have native support for a 64-bit CTLZ, via FLOGR.
355
356 // On arch15 we have native support for a 64-bit CTTZ.
357 if (Subtarget.hasMiscellaneousExtensions4()) {
361 }
362
363 // On z15 we have native support for a 64-bit CTPOP.
364 if (Subtarget.hasMiscellaneousExtensions3()) {
367 }
368
369 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
371
372 // Expand 128 bit shifts without using a libcall.
376
377 // Also expand 256 bit shifts if i128 is a legal type.
378 if (isTypeLegal(MVT::i128)) {
382 }
383
384 // Handle bitcast from fp128 to i128.
385 if (!isTypeLegal(MVT::i128))
387
388 // We have native instructions for i8, i16 and i32 extensions, but not i1.
390 for (MVT VT : MVT::integer_valuetypes()) {
394 }
395
396 // Handle the various types of symbolic address.
402
403 // We need to handle dynamic allocations specially because of the
404 // 160-byte area at the bottom of the stack.
407
410
411 // Handle prefetches with PFD or PFDRL.
413
414 // Handle readcyclecounter with STCKF.
416
418 // Assume by default that all vector operations need to be expanded.
419 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
420 if (getOperationAction(Opcode, VT) == Legal)
421 setOperationAction(Opcode, VT, Expand);
422
423 // Likewise all truncating stores and extending loads.
424 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
425 setTruncStoreAction(VT, InnerVT, Expand);
428 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
429 }
430
431 if (isTypeLegal(VT)) {
432 // These operations are legal for anything that can be stored in a
433 // vector register, even if there is no native support for the format
434 // as such. In particular, we can do these for v4f32 even though there
435 // are no specific instructions for that format.
441
442 // Likewise, except that we need to replace the nodes with something
443 // more specific.
446 }
447 }
448
449 // Handle integer vector types.
451 if (isTypeLegal(VT)) {
452 // These operations have direct equivalents.
457 if (VT != MVT::v2i64 || Subtarget.hasVectorEnhancements3())
459 if (Subtarget.hasVectorEnhancements3() &&
460 VT != MVT::v16i8 && VT != MVT::v8i16) {
465 }
470 if (Subtarget.hasVectorEnhancements1())
472 else
476
477 // Convert a GPR scalar to a vector by inserting it into element 0.
479
480 // Use a series of unpacks for extensions.
483
484 // Detect shifts/rotates by a scalar amount and convert them into
485 // V*_BY_SCALAR.
490
491 // Add ISD::VECREDUCE_ADD as custom in order to implement
492 // it with VZERO+VSUM
494
495 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
496 // and inverting the result as necessary.
498 }
499 }
500
501 if (Subtarget.hasVector()) {
502 // There should be no need to check for float types other than v2f64
503 // since <2 x f32> isn't a legal type.
512
521 }
522
523 if (Subtarget.hasVectorEnhancements2()) {
532
541 }
542
543 // Handle floating-point types.
544 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
545 I <= MVT::LAST_FP_VALUETYPE;
546 ++I) {
548 if (isTypeLegal(VT)) {
549 // We can use FI for FRINT.
551
552 // We can use the extended form of FI for other rounding operations.
553 if (Subtarget.hasFPExtension()) {
559 }
560
561 // No special instructions for these.
567
568 // Special treatment.
570
571 // Handle constrained floating-point operations.
581 if (Subtarget.hasFPExtension()) {
587 }
588 }
589 }
590
591 // Handle floating-point vector types.
592 if (Subtarget.hasVector()) {
593 // Scalar-to-vector conversion is just a subreg.
596
597 // Some insertions and extractions can be done directly but others
598 // need to go via integers.
603
604 // These operations have direct equivalents.
605 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
606 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
607 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
608 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
609 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
610 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
611 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
612 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
613 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
616 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
619
620 // Handle constrained floating-point operations.
633
638 if (Subtarget.hasVectorEnhancements1()) {
641 }
642 }
643
644 // The vector enhancements facility 1 has instructions for these.
645 if (Subtarget.hasVectorEnhancements1()) {
646 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
647 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
648 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
649 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
650 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
651 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
652 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
653 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
654 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
657 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
660
665
670
675
680
685
686 // Handle constrained floating-point operations.
699 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
700 MVT::v4f32, MVT::v2f64 }) {
705 }
706 }
707
708 // We only have fused f128 multiply-addition on vector registers.
709 if (!Subtarget.hasVectorEnhancements1()) {
712 }
713
714 // We don't have a copysign instruction on vector registers.
715 if (Subtarget.hasVectorEnhancements1())
717
718 // Needed so that we don't try to implement f128 constant loads using
719 // a load-and-extend of a f80 constant (in cases where the constant
720 // would fit in an f80).
721 for (MVT VT : MVT::fp_valuetypes())
722 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
723
724 // We don't have extending load instruction on vector registers.
725 if (Subtarget.hasVectorEnhancements1()) {
726 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
727 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
728 }
729
730 // Floating-point truncation and stores need to be done separately.
731 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
732 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
733 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
734
735 // We have 64-bit FPR<->GPR moves, but need special handling for
736 // 32-bit forms.
737 if (!Subtarget.hasVector()) {
740 }
741
742 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
743 // structure, but VAEND is a no-op.
747
748 if (Subtarget.isTargetzOS()) {
749 // Handle address space casts between mixed sized pointers.
752 }
753
755
756 // Codes for which we want to perform some z-specific combinations.
760 ISD::LOAD,
771 ISD::SDIV,
772 ISD::UDIV,
773 ISD::SREM,
774 ISD::UREM,
777
778 // Handle intrinsics.
781
782 // We're not using SJLJ for exception handling, but they're implemented
783 // solely to support use of __builtin_setjmp / __builtin_longjmp.
786
787 // We want to use MVC in preference to even a single load/store pair.
788 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
790
791 // The main memset sequence is a byte store followed by an MVC.
792 // Two STC or MV..I stores win over that, but the kind of fused stores
793 // generated by target-independent code don't when the byte value is
794 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
795 // than "STC;MVC". Handle the choice in target-specific code instead.
796 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
798
799 // Default to having -disable-strictnode-mutation on
800 IsStrictFPEnabled = true;
801
802 if (Subtarget.isTargetzOS()) {
803 struct RTLibCallMapping {
804 RTLIB::Libcall Code;
805 const char *Name;
806 };
807 static RTLibCallMapping RTLibCallCommon[] = {
808#define HANDLE_LIBCALL(code, name) {RTLIB::code, name},
809#include "ZOSLibcallNames.def"
810 };
811 for (auto &E : RTLibCallCommon)
812 setLibcallName(E.Code, E.Name);
813 }
814}
815
817 return Subtarget.hasSoftFloat();
818}
819
821 LLVMContext &, EVT VT) const {
822 if (!VT.isVector())
823 return MVT::i32;
825}
826
828 const MachineFunction &MF, EVT VT) const {
829 if (useSoftFloat())
830 return false;
831
832 VT = VT.getScalarType();
833
834 if (!VT.isSimple())
835 return false;
836
837 switch (VT.getSimpleVT().SimpleTy) {
838 case MVT::f32:
839 case MVT::f64:
840 return true;
841 case MVT::f128:
842 return Subtarget.hasVectorEnhancements1();
843 default:
844 break;
845 }
846
847 return false;
848}
849
850// Return true if the constant can be generated with a vector instruction,
851// such as VGM, VGMB or VREPI.
853 const SystemZSubtarget &Subtarget) {
854 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
855 if (!Subtarget.hasVector() ||
856 (isFP128 && !Subtarget.hasVectorEnhancements1()))
857 return false;
858
859 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
860 // preferred way of creating all-zero and all-one vectors so give it
861 // priority over other methods below.
862 unsigned Mask = 0;
863 unsigned I = 0;
864 for (; I < SystemZ::VectorBytes; ++I) {
865 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
866 if (Byte == 0xff)
867 Mask |= 1ULL << I;
868 else if (Byte != 0)
869 break;
870 }
871 if (I == SystemZ::VectorBytes) {
873 OpVals.push_back(Mask);
875 return true;
876 }
877
878 if (SplatBitSize > 64)
879 return false;
880
881 auto tryValue = [&](uint64_t Value) -> bool {
882 // Try VECTOR REPLICATE IMMEDIATE
883 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
884 if (isInt<16>(SignedValue)) {
885 OpVals.push_back(((unsigned) SignedValue));
888 SystemZ::VectorBits / SplatBitSize);
889 return true;
890 }
891 // Try VECTOR GENERATE MASK
892 unsigned Start, End;
893 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
894 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
895 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
896 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
897 OpVals.push_back(Start - (64 - SplatBitSize));
898 OpVals.push_back(End - (64 - SplatBitSize));
901 SystemZ::VectorBits / SplatBitSize);
902 return true;
903 }
904 return false;
905 };
906
907 // First try assuming that any undefined bits above the highest set bit
908 // and below the lowest set bit are 1s. This increases the likelihood of
909 // being able to use a sign-extended element value in VECTOR REPLICATE
910 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
911 uint64_t SplatBitsZ = SplatBits.getZExtValue();
912 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
913 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
914 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
915 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
916 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
917 if (tryValue(SplatBitsZ | Upper | Lower))
918 return true;
919
920 // Now try assuming that any undefined bits between the first and
921 // last defined set bits are set. This increases the chances of
922 // using a non-wraparound mask.
923 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
924 return tryValue(SplatBitsZ | Middle);
925}
926
928 if (IntImm.isSingleWord()) {
929 IntBits = APInt(128, IntImm.getZExtValue());
930 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
931 } else
932 IntBits = IntImm;
933 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
934
935 // Find the smallest splat.
936 SplatBits = IntImm;
937 unsigned Width = SplatBits.getBitWidth();
938 while (Width > 8) {
939 unsigned HalfSize = Width / 2;
940 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
941 APInt LowValue = SplatBits.trunc(HalfSize);
942
943 // If the two halves do not match, stop here.
944 if (HighValue != LowValue || 8 > HalfSize)
945 break;
946
947 SplatBits = HighValue;
948 Width = HalfSize;
949 }
950 SplatUndef = 0;
951 SplatBitSize = Width;
952}
953
955 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
956 bool HasAnyUndefs;
957
958 // Get IntBits by finding the 128 bit splat.
959 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
960 true);
961
962 // Get SplatBits by finding the 8 bit or greater splat.
963 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
964 true);
965}
966
968 bool ForCodeSize) const {
969 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
970 if (Imm.isZero() || Imm.isNegZero())
971 return true;
972
974}
975
978 MachineBasicBlock *MBB) const {
979 DebugLoc DL = MI.getDebugLoc();
980 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
981 const SystemZRegisterInfo *TRI = Subtarget.getRegisterInfo();
982
985
986 const BasicBlock *BB = MBB->getBasicBlock();
988
989 Register DstReg = MI.getOperand(0).getReg();
990 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
991 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
992 (void)TRI;
993 Register mainDstReg = MRI.createVirtualRegister(RC);
994 Register restoreDstReg = MRI.createVirtualRegister(RC);
995
996 MVT PVT = getPointerTy(MF->getDataLayout());
997 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
998 // For v = setjmp(buf), we generate.
999 // Algorithm:
1000 //
1001 // ---------
1002 // | thisMBB |
1003 // ---------
1004 // |
1005 // ------------------------
1006 // | |
1007 // ---------- ---------------
1008 // | mainMBB | | restoreMBB |
1009 // | v = 0 | | v = 1 |
1010 // ---------- ---------------
1011 // | |
1012 // -------------------------
1013 // |
1014 // -----------------------------
1015 // | sinkMBB |
1016 // | phi(v_mainMBB,v_restoreMBB) |
1017 // -----------------------------
1018 // thisMBB:
1019 // buf[FPOffset] = Frame Pointer if hasFP.
1020 // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB.
1021 // buf[BCOffset] = Backchain value if building with -mbackchain.
1022 // buf[SPOffset] = Stack Pointer.
1023 // buf[LPOffset] = We never write this slot with R13, gcc stores R13 always.
1024 // SjLjSetup restoreMBB
1025 // mainMBB:
1026 // v_main = 0
1027 // sinkMBB:
1028 // v = phi(v_main, v_restore)
1029 // restoreMBB:
1030 // v_restore = 1
1031
1032 MachineBasicBlock *thisMBB = MBB;
1033 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
1034 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
1035 MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB);
1036
1037 MF->insert(I, mainMBB);
1038 MF->insert(I, sinkMBB);
1039 MF->push_back(restoreMBB);
1040 restoreMBB->setMachineBlockAddressTaken();
1041
1043
1044 // Transfer the remainder of BB and its successor edges to sinkMBB.
1045 sinkMBB->splice(sinkMBB->begin(), MBB,
1046 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
1048
1049 // thisMBB:
1050 const int64_t FPOffset = 0; // Slot 1.
1051 const int64_t LabelOffset = 1 * PVT.getStoreSize(); // Slot 2.
1052 const int64_t BCOffset = 2 * PVT.getStoreSize(); // Slot 3.
1053 const int64_t SPOffset = 3 * PVT.getStoreSize(); // Slot 4.
1054
1055 // Buf address.
1056 Register BufReg = MI.getOperand(1).getReg();
1057
1058 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
1059 unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
1060
1061 // Prepare IP for longjmp.
1062 BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::LARL), LabelReg)
1063 .addMBB(restoreMBB);
1064 // Store IP for return from jmp, slot 2, offset = 1.
1065 BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::STG))
1066 .addReg(LabelReg)
1067 .addReg(BufReg)
1068 .addImm(LabelOffset)
1069 .addReg(0);
1070
1071 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1072 bool HasFP = Subtarget.getFrameLowering()->hasFP(*MF);
1073 if (HasFP) {
1074 BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::STG))
1075 .addReg(SpecialRegs->getFramePointerRegister())
1076 .addReg(BufReg)
1077 .addImm(FPOffset)
1078 .addReg(0);
1079 }
1080
1081 // Store SP.
1082 BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::STG))
1083 .addReg(SpecialRegs->getStackPointerRegister())
1084 .addReg(BufReg)
1085 .addImm(SPOffset)
1086 .addReg(0);
1087
1088 // Slot 3(Offset = 2) Backchain value (if building with -mbackchain).
1089 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1090 if (BackChain) {
1091 Register BCReg = MRI.createVirtualRegister(PtrRC);
1092 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1093 MIB = BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1094 .addReg(SpecialRegs->getStackPointerRegister())
1095 .addImm(TFL->getBackchainOffset(*MF))
1096 .addReg(0);
1097
1098 BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::STG))
1099 .addReg(BCReg)
1100 .addReg(BufReg)
1101 .addImm(BCOffset)
1102 .addReg(0);
1103 }
1104
1105 // Setup.
1106 MIB = BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::EH_SjLj_Setup))
1107 .addMBB(restoreMBB);
1108
1109 const SystemZRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1110 MIB.addRegMask(RegInfo->getNoPreservedMask());
1111
1112 thisMBB->addSuccessor(mainMBB);
1113 thisMBB->addSuccessor(restoreMBB);
1114
1115 // mainMBB:
1116 BuildMI(mainMBB, DL, TII->get(SystemZ::LHI), mainDstReg).addImm(0);
1117 mainMBB->addSuccessor(sinkMBB);
1118
1119 // sinkMBB:
1120 BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(SystemZ::PHI), DstReg)
1121 .addReg(mainDstReg)
1122 .addMBB(mainMBB)
1123 .addReg(restoreDstReg)
1124 .addMBB(restoreMBB);
1125
1126 // restoreMBB.
1127 BuildMI(restoreMBB, DL, TII->get(SystemZ::LHI), restoreDstReg).addImm(1);
1128 BuildMI(restoreMBB, DL, TII->get(SystemZ::J)).addMBB(sinkMBB);
1129 restoreMBB->addSuccessor(sinkMBB);
1130
1131 MI.eraseFromParent();
1132
1133 return sinkMBB;
1134}
1135
1138 MachineBasicBlock *MBB) const {
1139
1140 DebugLoc DL = MI.getDebugLoc();
1141 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1142
1143 MachineFunction *MF = MBB->getParent();
1145
1146 MVT PVT = getPointerTy(MF->getDataLayout());
1147 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1148 Register BufReg = MI.getOperand(0).getReg();
1149 const TargetRegisterClass *RC = MRI.getRegClass(BufReg);
1150 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1151
1152 Register Tmp = MRI.createVirtualRegister(RC);
1153 Register BCReg = MRI.createVirtualRegister(RC);
1154
1156
1157 const int64_t FPOffset = 0;
1158 const int64_t LabelOffset = 1 * PVT.getStoreSize();
1159 const int64_t BCOffset = 2 * PVT.getStoreSize();
1160 const int64_t SPOffset = 3 * PVT.getStoreSize();
1161 const int64_t LPOffset = 4 * PVT.getStoreSize();
1162
1163 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), Tmp)
1164 .addReg(BufReg)
1165 .addImm(LabelOffset)
1166 .addReg(0);
1167
1168 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1169 SpecialRegs->getFramePointerRegister())
1170 .addReg(BufReg)
1171 .addImm(FPOffset)
1172 .addReg(0);
1173
1174 // We are restoring R13 even though we never stored in setjmp from llvm,
1175 // as gcc always stores R13 in builtin_setjmp. We could have mixed code
1176 // gcc setjmp and llvm longjmp.
1177 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), SystemZ::R13D)
1178 .addReg(BufReg)
1179 .addImm(LPOffset)
1180 .addReg(0);
1181
1182 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1183 if (BackChain) {
1184 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1185 .addReg(BufReg)
1186 .addImm(BCOffset)
1187 .addReg(0);
1188 }
1189
1190 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1191 SpecialRegs->getStackPointerRegister())
1192 .addReg(BufReg)
1193 .addImm(SPOffset)
1194 .addReg(0);
1195
1196 if (BackChain) {
1197 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1198 BuildMI(*MBB, MI, DL, TII->get(SystemZ::STG))
1199 .addReg(BCReg)
1200 .addReg(SpecialRegs->getStackPointerRegister())
1201 .addImm(TFL->getBackchainOffset(*MF))
1202 .addReg(0);
1203 }
1204
1205 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BR)).addReg(Tmp);
1206
1207 MI.eraseFromParent();
1208 return MBB;
1209}
1210
1211/// Returns true if stack probing through inline assembly is requested.
1213 // If the function specifically requests inline stack probes, emit them.
1214 if (MF.getFunction().hasFnAttribute("probe-stack"))
1215 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
1216 "inline-asm";
1217 return false;
1218}
1219
1223}
1224
1228}
1229
1232 // Don't expand subword operations as they require special treatment.
1233 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
1235
1236 // Don't expand if there is a target instruction available.
1237 if (Subtarget.hasInterlockedAccess1() &&
1238 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
1245
1247}
1248
1250 // We can use CGFI or CLGFI.
1251 return isInt<32>(Imm) || isUInt<32>(Imm);
1252}
1253
1255 // We can use ALGFI or SLGFI.
1256 return isUInt<32>(Imm) || isUInt<32>(-Imm);
1257}
1258
1260 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
1261 // Unaligned accesses should never be slower than the expanded version.
1262 // We check specifically for aligned accesses in the few cases where
1263 // they are required.
1264 if (Fast)
1265 *Fast = 1;
1266 return true;
1267}
1268
1269// Information about the addressing mode for a memory access.
1271 // True if a long displacement is supported.
1273
1274 // True if use of index register is supported.
1276
1277 AddressingMode(bool LongDispl, bool IdxReg) :
1278 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
1279};
1280
1281// Return the desired addressing mode for a Load which has only one use (in
1282// the same block) which is a Store.
1284 Type *Ty) {
1285 // With vector support a Load->Store combination may be combined to either
1286 // an MVC or vector operations and it seems to work best to allow the
1287 // vector addressing mode.
1288 if (HasVector)
1289 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1290
1291 // Otherwise only the MVC case is special.
1292 bool MVC = Ty->isIntegerTy(8);
1293 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1294}
1295
1296// Return the addressing mode which seems most desirable given an LLVM
1297// Instruction pointer.
1298static AddressingMode
1300 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1301 switch (II->getIntrinsicID()) {
1302 default: break;
1303 case Intrinsic::memset:
1304 case Intrinsic::memmove:
1305 case Intrinsic::memcpy:
1306 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1307 }
1308 }
1309
1310 if (isa<LoadInst>(I) && I->hasOneUse()) {
1311 auto *SingleUser = cast<Instruction>(*I->user_begin());
1312 if (SingleUser->getParent() == I->getParent()) {
1313 if (isa<ICmpInst>(SingleUser)) {
1314 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1315 if (C->getBitWidth() <= 64 &&
1316 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1317 // Comparison of memory with 16 bit signed / unsigned immediate
1318 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1319 } else if (isa<StoreInst>(SingleUser))
1320 // Load->Store
1321 return getLoadStoreAddrMode(HasVector, I->getType());
1322 }
1323 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1324 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1325 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1326 // Load->Store
1327 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1328 }
1329
1330 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1331
1332 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1333 // dependencies (LDE only supports small offsets).
1334 // * Utilize the vector registers to hold floating point
1335 // values (vector load / store instructions only support small
1336 // offsets).
1337
1338 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1339 I->getOperand(0)->getType());
1340 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1341 bool IsVectorAccess = MemAccessTy->isVectorTy();
1342
1343 // A store of an extracted vector element will be combined into a VSTE type
1344 // instruction.
1345 if (!IsVectorAccess && isa<StoreInst>(I)) {
1346 Value *DataOp = I->getOperand(0);
1347 if (isa<ExtractElementInst>(DataOp))
1348 IsVectorAccess = true;
1349 }
1350
1351 // A load which gets inserted into a vector element will be combined into a
1352 // VLE type instruction.
1353 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1354 User *LoadUser = *I->user_begin();
1355 if (isa<InsertElementInst>(LoadUser))
1356 IsVectorAccess = true;
1357 }
1358
1359 if (IsFPAccess || IsVectorAccess)
1360 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1361 }
1362
1363 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1364}
1365
1367 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1368 // Punt on globals for now, although they can be used in limited
1369 // RELATIVE LONG cases.
1370 if (AM.BaseGV)
1371 return false;
1372
1373 // Require a 20-bit signed offset.
1374 if (!isInt<20>(AM.BaseOffs))
1375 return false;
1376
1377 bool RequireD12 =
1378 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1379 AddressingMode SupportedAM(!RequireD12, true);
1380 if (I != nullptr)
1381 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1382
1383 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1384 return false;
1385
1386 if (!SupportedAM.IndexReg)
1387 // No indexing allowed.
1388 return AM.Scale == 0;
1389 else
1390 // Indexing is OK but no scale factor can be applied.
1391 return AM.Scale == 0 || AM.Scale == 1;
1392}
1393
1395 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
1396 unsigned SrcAS, const AttributeList &FuncAttributes) const {
1397 const int MVCFastLen = 16;
1398
1399 if (Limit != ~unsigned(0)) {
1400 // Don't expand Op into scalar loads/stores in these cases:
1401 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1402 return false; // Small memcpy: Use MVC
1403 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1404 return false; // Small memset (first byte with STC/MVI): Use MVC
1405 if (Op.isZeroMemset())
1406 return false; // Memset zero: Use XC
1407 }
1408
1409 return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
1410 SrcAS, FuncAttributes);
1411}
1412
1414 const AttributeList &FuncAttributes) const {
1415 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1416}
1417
1418bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1419 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1420 return false;
1421 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1422 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1423 return FromBits > ToBits;
1424}
1425
1427 if (!FromVT.isInteger() || !ToVT.isInteger())
1428 return false;
1429 unsigned FromBits = FromVT.getFixedSizeInBits();
1430 unsigned ToBits = ToVT.getFixedSizeInBits();
1431 return FromBits > ToBits;
1432}
1433
1434//===----------------------------------------------------------------------===//
1435// Inline asm support
1436//===----------------------------------------------------------------------===//
1437
1440 if (Constraint.size() == 1) {
1441 switch (Constraint[0]) {
1442 case 'a': // Address register
1443 case 'd': // Data register (equivalent to 'r')
1444 case 'f': // Floating-point register
1445 case 'h': // High-part register
1446 case 'r': // General-purpose register
1447 case 'v': // Vector register
1448 return C_RegisterClass;
1449
1450 case 'Q': // Memory with base and unsigned 12-bit displacement
1451 case 'R': // Likewise, plus an index
1452 case 'S': // Memory with base and signed 20-bit displacement
1453 case 'T': // Likewise, plus an index
1454 case 'm': // Equivalent to 'T'.
1455 return C_Memory;
1456
1457 case 'I': // Unsigned 8-bit constant
1458 case 'J': // Unsigned 12-bit constant
1459 case 'K': // Signed 16-bit constant
1460 case 'L': // Signed 20-bit displacement (on all targets we support)
1461 case 'M': // 0x7fffffff
1462 return C_Immediate;
1463
1464 default:
1465 break;
1466 }
1467 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1468 switch (Constraint[1]) {
1469 case 'Q': // Address with base and unsigned 12-bit displacement
1470 case 'R': // Likewise, plus an index
1471 case 'S': // Address with base and signed 20-bit displacement
1472 case 'T': // Likewise, plus an index
1473 return C_Address;
1474
1475 default:
1476 break;
1477 }
1478 }
1479 return TargetLowering::getConstraintType(Constraint);
1480}
1481
1484 const char *constraint) const {
1486 Value *CallOperandVal = info.CallOperandVal;
1487 // If we don't have a value, we can't do a match,
1488 // but allow it at the lowest weight.
1489 if (!CallOperandVal)
1490 return CW_Default;
1491 Type *type = CallOperandVal->getType();
1492 // Look at the constraint type.
1493 switch (*constraint) {
1494 default:
1496 break;
1497
1498 case 'a': // Address register
1499 case 'd': // Data register (equivalent to 'r')
1500 case 'h': // High-part register
1501 case 'r': // General-purpose register
1502 weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1503 break;
1504
1505 case 'f': // Floating-point register
1506 if (!useSoftFloat())
1507 weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1508 break;
1509
1510 case 'v': // Vector register
1511 if (Subtarget.hasVector())
1512 weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1513 : CW_Default;
1514 break;
1515
1516 case 'I': // Unsigned 8-bit constant
1517 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1518 if (isUInt<8>(C->getZExtValue()))
1519 weight = CW_Constant;
1520 break;
1521
1522 case 'J': // Unsigned 12-bit constant
1523 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1524 if (isUInt<12>(C->getZExtValue()))
1525 weight = CW_Constant;
1526 break;
1527
1528 case 'K': // Signed 16-bit constant
1529 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1530 if (isInt<16>(C->getSExtValue()))
1531 weight = CW_Constant;
1532 break;
1533
1534 case 'L': // Signed 20-bit displacement (on all targets we support)
1535 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1536 if (isInt<20>(C->getSExtValue()))
1537 weight = CW_Constant;
1538 break;
1539
1540 case 'M': // 0x7fffffff
1541 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1542 if (C->getZExtValue() == 0x7fffffff)
1543 weight = CW_Constant;
1544 break;
1545 }
1546 return weight;
1547}
1548
1549// Parse a "{tNNN}" register constraint for which the register type "t"
1550// has already been verified. MC is the class associated with "t" and
1551// Map maps 0-based register numbers to LLVM register numbers.
1552static std::pair<unsigned, const TargetRegisterClass *>
1554 const unsigned *Map, unsigned Size) {
1555 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1556 if (isdigit(Constraint[2])) {
1557 unsigned Index;
1558 bool Failed =
1559 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1560 if (!Failed && Index < Size && Map[Index])
1561 return std::make_pair(Map[Index], RC);
1562 }
1563 return std::make_pair(0U, nullptr);
1564}
1565
1566std::pair<unsigned, const TargetRegisterClass *>
1568 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1569 if (Constraint.size() == 1) {
1570 // GCC Constraint Letters
1571 switch (Constraint[0]) {
1572 default: break;
1573 case 'd': // Data register (equivalent to 'r')
1574 case 'r': // General-purpose register
1575 if (VT.getSizeInBits() == 64)
1576 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1577 else if (VT.getSizeInBits() == 128)
1578 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1579 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1580
1581 case 'a': // Address register
1582 if (VT == MVT::i64)
1583 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1584 else if (VT == MVT::i128)
1585 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1586 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1587
1588 case 'h': // High-part register (an LLVM extension)
1589 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1590
1591 case 'f': // Floating-point register
1592 if (!useSoftFloat()) {
1593 if (VT.getSizeInBits() == 64)
1594 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1595 else if (VT.getSizeInBits() == 128)
1596 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1597 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1598 }
1599 break;
1600
1601 case 'v': // Vector register
1602 if (Subtarget.hasVector()) {
1603 if (VT.getSizeInBits() == 32)
1604 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1605 if (VT.getSizeInBits() == 64)
1606 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1607 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1608 }
1609 break;
1610 }
1611 }
1612 if (Constraint.starts_with("{")) {
1613
1614 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1615 // to check the size on.
1616 auto getVTSizeInBits = [&VT]() {
1617 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1618 };
1619
1620 // We need to override the default register parsing for GPRs and FPRs
1621 // because the interpretation depends on VT. The internal names of
1622 // the registers are also different from the external names
1623 // (F0D and F0S instead of F0, etc.).
1624 if (Constraint[1] == 'r') {
1625 if (getVTSizeInBits() == 32)
1626 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1628 if (getVTSizeInBits() == 128)
1629 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1631 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1633 }
1634 if (Constraint[1] == 'f') {
1635 if (useSoftFloat())
1636 return std::make_pair(
1637 0u, static_cast<const TargetRegisterClass *>(nullptr));
1638 if (getVTSizeInBits() == 32)
1639 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1641 if (getVTSizeInBits() == 128)
1642 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1644 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1646 }
1647 if (Constraint[1] == 'v') {
1648 if (!Subtarget.hasVector())
1649 return std::make_pair(
1650 0u, static_cast<const TargetRegisterClass *>(nullptr));
1651 if (getVTSizeInBits() == 32)
1652 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1654 if (getVTSizeInBits() == 64)
1655 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1657 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1659 }
1660 }
1661 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1662}
1663
1664// FIXME? Maybe this could be a TableGen attribute on some registers and
1665// this table could be generated automatically from RegInfo.
1668 const MachineFunction &MF) const {
1669 Register Reg =
1671 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D
1672 : SystemZ::NoRegister)
1673 .Case("r15",
1674 Subtarget.isTargetELF() ? SystemZ::R15D : SystemZ::NoRegister)
1675 .Default(SystemZ::NoRegister);
1676
1677 if (Reg)
1678 return Reg;
1679 report_fatal_error("Invalid register name global variable");
1680}
1681
1683 const Constant *PersonalityFn) const {
1684 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1685}
1686
1688 const Constant *PersonalityFn) const {
1689 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1690}
1691
1693 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1694 SelectionDAG &DAG) const {
1695 // Only support length 1 constraints for now.
1696 if (Constraint.size() == 1) {
1697 switch (Constraint[0]) {
1698 case 'I': // Unsigned 8-bit constant
1699 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1700 if (isUInt<8>(C->getZExtValue()))
1701 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1702 Op.getValueType()));
1703 return;
1704
1705 case 'J': // Unsigned 12-bit constant
1706 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1707 if (isUInt<12>(C->getZExtValue()))
1708 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1709 Op.getValueType()));
1710 return;
1711
1712 case 'K': // Signed 16-bit constant
1713 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1714 if (isInt<16>(C->getSExtValue()))
1715 Ops.push_back(DAG.getSignedTargetConstant(
1716 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1717 return;
1718
1719 case 'L': // Signed 20-bit displacement (on all targets we support)
1720 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1721 if (isInt<20>(C->getSExtValue()))
1722 Ops.push_back(DAG.getSignedTargetConstant(
1723 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1724 return;
1725
1726 case 'M': // 0x7fffffff
1727 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1728 if (C->getZExtValue() == 0x7fffffff)
1729 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1730 Op.getValueType()));
1731 return;
1732 }
1733 }
1734 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1735}
1736
1737//===----------------------------------------------------------------------===//
1738// Calling conventions
1739//===----------------------------------------------------------------------===//
1740
1741#include "SystemZGenCallingConv.inc"
1742
1744 CallingConv::ID) const {
1745 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1746 SystemZ::R14D, 0 };
1747 return ScratchRegs;
1748}
1749
1751 Type *ToType) const {
1752 return isTruncateFree(FromType, ToType);
1753}
1754
1756 return CI->isTailCall();
1757}
1758
1759// Value is a value that has been passed to us in the location described by VA
1760// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1761// any loads onto Chain.
1763 CCValAssign &VA, SDValue Chain,
1764 SDValue Value) {
1765 // If the argument has been promoted from a smaller type, insert an
1766 // assertion to capture this.
1767 if (VA.getLocInfo() == CCValAssign::SExt)
1769 DAG.getValueType(VA.getValVT()));
1770 else if (VA.getLocInfo() == CCValAssign::ZExt)
1772 DAG.getValueType(VA.getValVT()));
1773
1774 if (VA.isExtInLoc())
1775 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1776 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1777 // If this is a short vector argument loaded from the stack,
1778 // extend from i64 to full vector size and then bitcast.
1779 assert(VA.getLocVT() == MVT::i64);
1780 assert(VA.getValVT().isVector());
1781 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1782 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1783 } else
1784 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1785 return Value;
1786}
1787
1788// Value is a value of type VA.getValVT() that we need to copy into
1789// the location described by VA. Return a copy of Value converted to
1790// VA.getValVT(). The caller is responsible for handling indirect values.
1792 CCValAssign &VA, SDValue Value) {
1793 switch (VA.getLocInfo()) {
1794 case CCValAssign::SExt:
1795 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1796 case CCValAssign::ZExt:
1797 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1798 case CCValAssign::AExt:
1799 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1800 case CCValAssign::BCvt: {
1801 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1802 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1803 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1804 // For an f32 vararg we need to first promote it to an f64 and then
1805 // bitcast it to an i64.
1806 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1807 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1808 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1809 ? MVT::v2i64
1810 : VA.getLocVT();
1811 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1812 // For ELF, this is a short vector argument to be stored to the stack,
1813 // bitcast to v2i64 and then extract first element.
1814 if (BitCastToType == MVT::v2i64)
1815 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1816 DAG.getConstant(0, DL, MVT::i32));
1817 return Value;
1818 }
1819 case CCValAssign::Full:
1820 return Value;
1821 default:
1822 llvm_unreachable("Unhandled getLocInfo()");
1823 }
1824}
1825
1827 SDLoc DL(In);
1828 SDValue Lo, Hi;
1829 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1830 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1831 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1832 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1833 DAG.getConstant(64, DL, MVT::i32)));
1834 } else {
1835 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1836 }
1837
1838 // FIXME: If v2i64 were a legal type, we could use it instead of
1839 // Untyped here. This might enable improved folding.
1840 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1841 MVT::Untyped, Hi, Lo);
1842 return SDValue(Pair, 0);
1843}
1844
1846 SDLoc DL(In);
1847 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1848 DL, MVT::i64, In);
1849 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1850 DL, MVT::i64, In);
1851
1852 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1853 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1854 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1855 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1856 DAG.getConstant(64, DL, MVT::i32));
1857 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1858 } else {
1859 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1860 }
1861}
1862
1864 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1865 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1866 EVT ValueVT = Val.getValueType();
1867 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1868 // Inline assembly operand.
1869 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1870 return true;
1871 }
1872
1873 return false;
1874}
1875
1877 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1878 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1879 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1880 // Inline assembly operand.
1881 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1882 return DAG.getBitcast(ValueVT, Res);
1883 }
1884
1885 return SDValue();
1886}
1887
1889 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1890 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1891 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1893 MachineFrameInfo &MFI = MF.getFrameInfo();
1895 SystemZMachineFunctionInfo *FuncInfo =
1897 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1898 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1899
1900 // Assign locations to all of the incoming arguments.
1902 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1903 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1904 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
1905
1906 unsigned NumFixedGPRs = 0;
1907 unsigned NumFixedFPRs = 0;
1908 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1909 SDValue ArgValue;
1910 CCValAssign &VA = ArgLocs[I];
1911 EVT LocVT = VA.getLocVT();
1912 if (VA.isRegLoc()) {
1913 // Arguments passed in registers
1914 const TargetRegisterClass *RC;
1915 switch (LocVT.getSimpleVT().SimpleTy) {
1916 default:
1917 // Integers smaller than i64 should be promoted to i64.
1918 llvm_unreachable("Unexpected argument type");
1919 case MVT::i32:
1920 NumFixedGPRs += 1;
1921 RC = &SystemZ::GR32BitRegClass;
1922 break;
1923 case MVT::i64:
1924 NumFixedGPRs += 1;
1925 RC = &SystemZ::GR64BitRegClass;
1926 break;
1927 case MVT::f32:
1928 NumFixedFPRs += 1;
1929 RC = &SystemZ::FP32BitRegClass;
1930 break;
1931 case MVT::f64:
1932 NumFixedFPRs += 1;
1933 RC = &SystemZ::FP64BitRegClass;
1934 break;
1935 case MVT::f128:
1936 NumFixedFPRs += 2;
1937 RC = &SystemZ::FP128BitRegClass;
1938 break;
1939 case MVT::v16i8:
1940 case MVT::v8i16:
1941 case MVT::v4i32:
1942 case MVT::v2i64:
1943 case MVT::v4f32:
1944 case MVT::v2f64:
1945 RC = &SystemZ::VR128BitRegClass;
1946 break;
1947 }
1948
1949 Register VReg = MRI.createVirtualRegister(RC);
1950 MRI.addLiveIn(VA.getLocReg(), VReg);
1951 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1952 } else {
1953 assert(VA.isMemLoc() && "Argument not register or memory");
1954
1955 // Create the frame index object for this incoming parameter.
1956 // FIXME: Pre-include call frame size in the offset, should not
1957 // need to manually add it here.
1958 int64_t ArgSPOffset = VA.getLocMemOffset();
1959 if (Subtarget.isTargetXPLINK64()) {
1960 auto &XPRegs =
1962 ArgSPOffset += XPRegs.getCallFrameSize();
1963 }
1964 int FI =
1965 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1966
1967 // Create the SelectionDAG nodes corresponding to a load
1968 // from this parameter. Unpromoted ints and floats are
1969 // passed as right-justified 8-byte values.
1970 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1971 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1972 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1973 DAG.getIntPtrConstant(4, DL));
1974 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1976 }
1977
1978 // Convert the value of the argument register into the value that's
1979 // being passed.
1980 if (VA.getLocInfo() == CCValAssign::Indirect) {
1981 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1983 // If the original argument was split (e.g. i128), we need
1984 // to load all parts of it here (using the same address).
1985 unsigned ArgIndex = Ins[I].OrigArgIndex;
1986 assert (Ins[I].PartOffset == 0);
1987 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1988 CCValAssign &PartVA = ArgLocs[I + 1];
1989 unsigned PartOffset = Ins[I + 1].PartOffset;
1990 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1991 DAG.getIntPtrConstant(PartOffset, DL));
1992 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1994 ++I;
1995 }
1996 } else
1997 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1998 }
1999
2000 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
2001 // Save the number of non-varargs registers for later use by va_start, etc.
2002 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2003 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2004
2005 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2006 Subtarget.getSpecialRegisters());
2007
2008 // Likewise the address (in the form of a frame index) of where the
2009 // first stack vararg would be. The 1-byte size here is arbitrary.
2010 // FIXME: Pre-include call frame size in the offset, should not
2011 // need to manually add it here.
2012 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
2013 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
2014 FuncInfo->setVarArgsFrameIndex(FI);
2015 }
2016
2017 if (IsVarArg && Subtarget.isTargetELF()) {
2018 // Save the number of non-varargs registers for later use by va_start, etc.
2019 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2020 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2021
2022 // Likewise the address (in the form of a frame index) of where the
2023 // first stack vararg would be. The 1-byte size here is arbitrary.
2024 int64_t VarArgsOffset = CCInfo.getStackSize();
2025 FuncInfo->setVarArgsFrameIndex(
2026 MFI.CreateFixedObject(1, VarArgsOffset, true));
2027
2028 // ...and a similar frame index for the caller-allocated save area
2029 // that will be used to store the incoming registers.
2030 int64_t RegSaveOffset =
2031 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
2032 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
2033 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
2034
2035 // Store the FPR varargs in the reserved frame slots. (We store the
2036 // GPRs as part of the prologue.)
2037 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
2039 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
2040 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
2041 int FI =
2043 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2045 &SystemZ::FP64BitRegClass);
2046 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
2047 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
2049 }
2050 // Join the stores, which are independent of one another.
2051 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2052 ArrayRef(&MemOps[NumFixedFPRs],
2053 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
2054 }
2055 }
2056
2057 if (Subtarget.isTargetXPLINK64()) {
2058 // Create virual register for handling incoming "ADA" special register (R5)
2059 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
2060 Register ADAvReg = MRI.createVirtualRegister(RC);
2061 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2062 Subtarget.getSpecialRegisters());
2063 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
2064 FuncInfo->setADAVirtualRegister(ADAvReg);
2065 }
2066 return Chain;
2067}
2068
2069static bool canUseSiblingCall(const CCState &ArgCCInfo,
2072 // Punt if there are any indirect or stack arguments, or if the call
2073 // needs the callee-saved argument register R6, or if the call uses
2074 // the callee-saved register arguments SwiftSelf and SwiftError.
2075 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2076 CCValAssign &VA = ArgLocs[I];
2078 return false;
2079 if (!VA.isRegLoc())
2080 return false;
2081 Register Reg = VA.getLocReg();
2082 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
2083 return false;
2084 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
2085 return false;
2086 }
2087 return true;
2088}
2089
2091 unsigned Offset, bool LoadAdr = false) {
2094 unsigned ADAvReg = MFI->getADAVirtualRegister();
2096
2097 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
2098 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
2099
2100 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
2101 if (!LoadAdr)
2102 Result = DAG.getLoad(
2103 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
2105
2106 return Result;
2107}
2108
2109// ADA access using Global value
2110// Note: for functions, address of descriptor is returned
2112 EVT PtrVT) {
2113 unsigned ADAtype;
2114 bool LoadAddr = false;
2115 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
2116 bool IsFunction =
2117 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
2118 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
2119
2120 if (IsFunction) {
2121 if (IsInternal) {
2123 LoadAddr = true;
2124 } else
2126 } else {
2128 }
2129 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
2130
2131 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
2132}
2133
2134static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
2135 SDLoc &DL, SDValue &Chain) {
2136 unsigned ADADelta = 0; // ADA offset in desc.
2137 unsigned EPADelta = 8; // EPA offset in desc.
2140
2141 // XPLink calling convention.
2142 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2143 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
2144 G->getGlobal()->hasPrivateLinkage());
2145 if (IsInternal) {
2148 unsigned ADAvReg = MFI->getADAVirtualRegister();
2149 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
2150 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2151 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2152 return true;
2153 } else {
2155 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2156 ADA = getADAEntry(DAG, GA, DL, ADADelta);
2157 Callee = getADAEntry(DAG, GA, DL, EPADelta);
2158 }
2159 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2161 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2162 ADA = getADAEntry(DAG, ES, DL, ADADelta);
2163 Callee = getADAEntry(DAG, ES, DL, EPADelta);
2164 } else {
2165 // Function pointer case
2166 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2167 DAG.getConstant(ADADelta, DL, PtrVT));
2168 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
2170 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2171 DAG.getConstant(EPADelta, DL, PtrVT));
2172 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
2174 }
2175 return false;
2176}
2177
2178SDValue
2180 SmallVectorImpl<SDValue> &InVals) const {
2181 SelectionDAG &DAG = CLI.DAG;
2182 SDLoc &DL = CLI.DL;
2184 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2186 SDValue Chain = CLI.Chain;
2187 SDValue Callee = CLI.Callee;
2188 bool &IsTailCall = CLI.IsTailCall;
2189 CallingConv::ID CallConv = CLI.CallConv;
2190 bool IsVarArg = CLI.IsVarArg;
2192 EVT PtrVT = getPointerTy(MF.getDataLayout());
2193 LLVMContext &Ctx = *DAG.getContext();
2195
2196 // FIXME: z/OS support to be added in later.
2197 if (Subtarget.isTargetXPLINK64())
2198 IsTailCall = false;
2199
2200 // Integer args <=32 bits should have an extension attribute.
2201 verifyNarrowIntegerArgs_Call(Outs, &MF.getFunction(), Callee);
2202
2203 // Analyze the operands of the call, assigning locations to each operand.
2205 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
2206 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
2207
2208 // We don't support GuaranteedTailCallOpt, only automatically-detected
2209 // sibling calls.
2210 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
2211 IsTailCall = false;
2212
2213 // Get a count of how many bytes are to be pushed on the stack.
2214 unsigned NumBytes = ArgCCInfo.getStackSize();
2215
2216 // Mark the start of the call.
2217 if (!IsTailCall)
2218 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
2219
2220 // Copy argument values to their designated locations.
2222 SmallVector<SDValue, 8> MemOpChains;
2223 SDValue StackPtr;
2224 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2225 CCValAssign &VA = ArgLocs[I];
2226 SDValue ArgValue = OutVals[I];
2227
2228 if (VA.getLocInfo() == CCValAssign::Indirect) {
2229 // Store the argument in a stack slot and pass its address.
2230 unsigned ArgIndex = Outs[I].OrigArgIndex;
2231 EVT SlotVT;
2232 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
2233 // Allocate the full stack space for a promoted (and split) argument.
2234 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
2235 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
2236 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
2237 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
2238 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
2239 } else {
2240 SlotVT = Outs[I].VT;
2241 }
2242 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
2243 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2244 MemOpChains.push_back(
2245 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
2247 // If the original argument was split (e.g. i128), we need
2248 // to store all parts of it here (and pass just one address).
2249 assert (Outs[I].PartOffset == 0);
2250 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
2251 SDValue PartValue = OutVals[I + 1];
2252 unsigned PartOffset = Outs[I + 1].PartOffset;
2253 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
2254 DAG.getIntPtrConstant(PartOffset, DL));
2255 MemOpChains.push_back(
2256 DAG.getStore(Chain, DL, PartValue, Address,
2258 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
2259 SlotVT.getStoreSize()) && "Not enough space for argument part!");
2260 ++I;
2261 }
2262 ArgValue = SpillSlot;
2263 } else
2264 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
2265
2266 if (VA.isRegLoc()) {
2267 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
2268 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
2269 // and low values.
2270 if (VA.getLocVT() == MVT::i128)
2271 ArgValue = lowerI128ToGR128(DAG, ArgValue);
2272 // Queue up the argument copies and emit them at the end.
2273 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
2274 } else {
2275 assert(VA.isMemLoc() && "Argument not register or memory");
2276
2277 // Work out the address of the stack slot. Unpromoted ints and
2278 // floats are passed as right-justified 8-byte values.
2279 if (!StackPtr.getNode())
2280 StackPtr = DAG.getCopyFromReg(Chain, DL,
2281 Regs->getStackPointerRegister(), PtrVT);
2282 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
2283 VA.getLocMemOffset();
2284 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
2285 Offset += 4;
2286 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2288
2289 // Emit the store.
2290 MemOpChains.push_back(
2291 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2292
2293 // Although long doubles or vectors are passed through the stack when
2294 // they are vararg (non-fixed arguments), if a long double or vector
2295 // occupies the third and fourth slot of the argument list GPR3 should
2296 // still shadow the third slot of the argument list.
2297 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2298 SDValue ShadowArgValue =
2299 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2300 DAG.getIntPtrConstant(1, DL));
2301 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2302 }
2303 }
2304 }
2305
2306 // Join the stores, which are independent of one another.
2307 if (!MemOpChains.empty())
2308 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2309
2310 // Accept direct calls by converting symbolic call addresses to the
2311 // associated Target* opcodes. Force %r1 to be used for indirect
2312 // tail calls.
2313 SDValue Glue;
2314
2315 if (Subtarget.isTargetXPLINK64()) {
2316 SDValue ADA;
2317 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2318 if (!IsBRASL) {
2319 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2320 ->getAddressOfCalleeRegister();
2321 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2322 Glue = Chain.getValue(1);
2323 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2324 }
2325 RegsToPass.push_back(std::make_pair(
2326 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2327 } else {
2328 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2329 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2330 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2331 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2332 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2333 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2334 } else if (IsTailCall) {
2335 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2336 Glue = Chain.getValue(1);
2337 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2338 }
2339 }
2340
2341 // Build a sequence of copy-to-reg nodes, chained and glued together.
2342 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
2343 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
2344 RegsToPass[I].second, Glue);
2345 Glue = Chain.getValue(1);
2346 }
2347
2348 // The first call operand is the chain and the second is the target address.
2350 Ops.push_back(Chain);
2351 Ops.push_back(Callee);
2352
2353 // Add argument registers to the end of the list so that they are
2354 // known live into the call.
2355 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
2356 Ops.push_back(DAG.getRegister(RegsToPass[I].first,
2357 RegsToPass[I].second.getValueType()));
2358
2359 // Add a register mask operand representing the call-preserved registers.
2360 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2361 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2362 assert(Mask && "Missing call preserved mask for calling convention");
2363 Ops.push_back(DAG.getRegisterMask(Mask));
2364
2365 // Glue the call to the argument copies, if any.
2366 if (Glue.getNode())
2367 Ops.push_back(Glue);
2368
2369 // Emit the call.
2370 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2371 if (IsTailCall) {
2372 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2373 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2374 return Ret;
2375 }
2376 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2377 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2378 Glue = Chain.getValue(1);
2379
2380 // Mark the end of the call, which is glued to the call itself.
2381 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2382 Glue = Chain.getValue(1);
2383
2384 // Assign locations to each value returned by this call.
2386 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2387 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2388
2389 // Copy all of the result registers out of their specified physreg.
2390 for (CCValAssign &VA : RetLocs) {
2391 // Copy the value out, gluing the copy to the end of the call sequence.
2392 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2393 VA.getLocVT(), Glue);
2394 Chain = RetValue.getValue(1);
2395 Glue = RetValue.getValue(2);
2396
2397 // Convert the value of the return register into the value that's
2398 // being returned.
2399 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2400 }
2401
2402 return Chain;
2403}
2404
2405// Generate a call taking the given operands as arguments and returning a
2406// result of type RetVT.
2408 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2409 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2410 bool DoesNotReturn, bool IsReturnValueUsed) const {
2412 Args.reserve(Ops.size());
2413
2415 for (SDValue Op : Ops) {
2416 Entry.Node = Op;
2417 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
2418 Entry.IsSExt = shouldSignExtendTypeInLibCall(Entry.Ty, IsSigned);
2419 Entry.IsZExt = !Entry.IsSExt;
2420 Args.push_back(Entry);
2421 }
2422
2423 SDValue Callee =
2424 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2425
2426 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2428 bool SignExtend = shouldSignExtendTypeInLibCall(RetTy, IsSigned);
2429 CLI.setDebugLoc(DL)
2430 .setChain(Chain)
2431 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2432 .setNoReturn(DoesNotReturn)
2433 .setDiscardResult(!IsReturnValueUsed)
2434 .setSExtResult(SignExtend)
2435 .setZExtResult(!SignExtend);
2436 return LowerCallTo(CLI);
2437}
2438
2441 MachineFunction &MF, bool isVarArg,
2443 LLVMContext &Context,
2444 const Type *RetTy) const {
2445 // Special case that we cannot easily detect in RetCC_SystemZ since
2446 // i128 may not be a legal type.
2447 for (auto &Out : Outs)
2448 if (Out.ArgVT == MVT::i128)
2449 return false;
2450
2452 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
2453 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2454}
2455
2456SDValue
2458 bool IsVarArg,
2460 const SmallVectorImpl<SDValue> &OutVals,
2461 const SDLoc &DL, SelectionDAG &DAG) const {
2463
2464 // Integer args <=32 bits should have an extension attribute.
2465 verifyNarrowIntegerArgs_Ret(Outs, &MF.getFunction());
2466
2467 // Assign locations to each returned value.
2469 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2470 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2471
2472 // Quick exit for void returns
2473 if (RetLocs.empty())
2474 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2475
2476 if (CallConv == CallingConv::GHC)
2477 report_fatal_error("GHC functions return void only");
2478
2479 // Copy the result values into the output registers.
2480 SDValue Glue;
2482 RetOps.push_back(Chain);
2483 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2484 CCValAssign &VA = RetLocs[I];
2485 SDValue RetValue = OutVals[I];
2486
2487 // Make the return register live on exit.
2488 assert(VA.isRegLoc() && "Can only return in registers!");
2489
2490 // Promote the value as required.
2491 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2492
2493 // Chain and glue the copies together.
2494 Register Reg = VA.getLocReg();
2495 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2496 Glue = Chain.getValue(1);
2497 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2498 }
2499
2500 // Update chain and glue.
2501 RetOps[0] = Chain;
2502 if (Glue.getNode())
2503 RetOps.push_back(Glue);
2504
2505 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2506}
2507
2508// Return true if Op is an intrinsic node with chain that returns the CC value
2509// as its only (other) argument. Provide the associated SystemZISD opcode and
2510// the mask of valid CC values if so.
2511static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2512 unsigned &CCValid) {
2513 unsigned Id = Op.getConstantOperandVal(1);
2514 switch (Id) {
2515 case Intrinsic::s390_tbegin:
2516 Opcode = SystemZISD::TBEGIN;
2517 CCValid = SystemZ::CCMASK_TBEGIN;
2518 return true;
2519
2520 case Intrinsic::s390_tbegin_nofloat:
2522 CCValid = SystemZ::CCMASK_TBEGIN;
2523 return true;
2524
2525 case Intrinsic::s390_tend:
2526 Opcode = SystemZISD::TEND;
2527 CCValid = SystemZ::CCMASK_TEND;
2528 return true;
2529
2530 default:
2531 return false;
2532 }
2533}
2534
2535// Return true if Op is an intrinsic node without chain that returns the
2536// CC value as its final argument. Provide the associated SystemZISD
2537// opcode and the mask of valid CC values if so.
2538static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2539 unsigned Id = Op.getConstantOperandVal(0);
2540 switch (Id) {
2541 case Intrinsic::s390_vpkshs:
2542 case Intrinsic::s390_vpksfs:
2543 case Intrinsic::s390_vpksgs:
2544 Opcode = SystemZISD::PACKS_CC;
2545 CCValid = SystemZ::CCMASK_VCMP;
2546 return true;
2547
2548 case Intrinsic::s390_vpklshs:
2549 case Intrinsic::s390_vpklsfs:
2550 case Intrinsic::s390_vpklsgs:
2551 Opcode = SystemZISD::PACKLS_CC;
2552 CCValid = SystemZ::CCMASK_VCMP;
2553 return true;
2554
2555 case Intrinsic::s390_vceqbs:
2556 case Intrinsic::s390_vceqhs:
2557 case Intrinsic::s390_vceqfs:
2558 case Intrinsic::s390_vceqgs:
2559 case Intrinsic::s390_vceqqs:
2560 Opcode = SystemZISD::VICMPES;
2561 CCValid = SystemZ::CCMASK_VCMP;
2562 return true;
2563
2564 case Intrinsic::s390_vchbs:
2565 case Intrinsic::s390_vchhs:
2566 case Intrinsic::s390_vchfs:
2567 case Intrinsic::s390_vchgs:
2568 case Intrinsic::s390_vchqs:
2569 Opcode = SystemZISD::VICMPHS;
2570 CCValid = SystemZ::CCMASK_VCMP;
2571 return true;
2572
2573 case Intrinsic::s390_vchlbs:
2574 case Intrinsic::s390_vchlhs:
2575 case Intrinsic::s390_vchlfs:
2576 case Intrinsic::s390_vchlgs:
2577 case Intrinsic::s390_vchlqs:
2578 Opcode = SystemZISD::VICMPHLS;
2579 CCValid = SystemZ::CCMASK_VCMP;
2580 return true;
2581
2582 case Intrinsic::s390_vtm:
2583 Opcode = SystemZISD::VTM;
2584 CCValid = SystemZ::CCMASK_VCMP;
2585 return true;
2586
2587 case Intrinsic::s390_vfaebs:
2588 case Intrinsic::s390_vfaehs:
2589 case Intrinsic::s390_vfaefs:
2590 Opcode = SystemZISD::VFAE_CC;
2591 CCValid = SystemZ::CCMASK_ANY;
2592 return true;
2593
2594 case Intrinsic::s390_vfaezbs:
2595 case Intrinsic::s390_vfaezhs:
2596 case Intrinsic::s390_vfaezfs:
2597 Opcode = SystemZISD::VFAEZ_CC;
2598 CCValid = SystemZ::CCMASK_ANY;
2599 return true;
2600
2601 case Intrinsic::s390_vfeebs:
2602 case Intrinsic::s390_vfeehs:
2603 case Intrinsic::s390_vfeefs:
2604 Opcode = SystemZISD::VFEE_CC;
2605 CCValid = SystemZ::CCMASK_ANY;
2606 return true;
2607
2608 case Intrinsic::s390_vfeezbs:
2609 case Intrinsic::s390_vfeezhs:
2610 case Intrinsic::s390_vfeezfs:
2611 Opcode = SystemZISD::VFEEZ_CC;
2612 CCValid = SystemZ::CCMASK_ANY;
2613 return true;
2614
2615 case Intrinsic::s390_vfenebs:
2616 case Intrinsic::s390_vfenehs:
2617 case Intrinsic::s390_vfenefs:
2618 Opcode = SystemZISD::VFENE_CC;
2619 CCValid = SystemZ::CCMASK_ANY;
2620 return true;
2621
2622 case Intrinsic::s390_vfenezbs:
2623 case Intrinsic::s390_vfenezhs:
2624 case Intrinsic::s390_vfenezfs:
2625 Opcode = SystemZISD::VFENEZ_CC;
2626 CCValid = SystemZ::CCMASK_ANY;
2627 return true;
2628
2629 case Intrinsic::s390_vistrbs:
2630 case Intrinsic::s390_vistrhs:
2631 case Intrinsic::s390_vistrfs:
2632 Opcode = SystemZISD::VISTR_CC;
2634 return true;
2635
2636 case Intrinsic::s390_vstrcbs:
2637 case Intrinsic::s390_vstrchs:
2638 case Intrinsic::s390_vstrcfs:
2639 Opcode = SystemZISD::VSTRC_CC;
2640 CCValid = SystemZ::CCMASK_ANY;
2641 return true;
2642
2643 case Intrinsic::s390_vstrczbs:
2644 case Intrinsic::s390_vstrczhs:
2645 case Intrinsic::s390_vstrczfs:
2646 Opcode = SystemZISD::VSTRCZ_CC;
2647 CCValid = SystemZ::CCMASK_ANY;
2648 return true;
2649
2650 case Intrinsic::s390_vstrsb:
2651 case Intrinsic::s390_vstrsh:
2652 case Intrinsic::s390_vstrsf:
2653 Opcode = SystemZISD::VSTRS_CC;
2654 CCValid = SystemZ::CCMASK_ANY;
2655 return true;
2656
2657 case Intrinsic::s390_vstrszb:
2658 case Intrinsic::s390_vstrszh:
2659 case Intrinsic::s390_vstrszf:
2660 Opcode = SystemZISD::VSTRSZ_CC;
2661 CCValid = SystemZ::CCMASK_ANY;
2662 return true;
2663
2664 case Intrinsic::s390_vfcedbs:
2665 case Intrinsic::s390_vfcesbs:
2666 Opcode = SystemZISD::VFCMPES;
2667 CCValid = SystemZ::CCMASK_VCMP;
2668 return true;
2669
2670 case Intrinsic::s390_vfchdbs:
2671 case Intrinsic::s390_vfchsbs:
2672 Opcode = SystemZISD::VFCMPHS;
2673 CCValid = SystemZ::CCMASK_VCMP;
2674 return true;
2675
2676 case Intrinsic::s390_vfchedbs:
2677 case Intrinsic::s390_vfchesbs:
2678 Opcode = SystemZISD::VFCMPHES;
2679 CCValid = SystemZ::CCMASK_VCMP;
2680 return true;
2681
2682 case Intrinsic::s390_vftcidb:
2683 case Intrinsic::s390_vftcisb:
2684 Opcode = SystemZISD::VFTCI;
2685 CCValid = SystemZ::CCMASK_VCMP;
2686 return true;
2687
2688 case Intrinsic::s390_tdc:
2689 Opcode = SystemZISD::TDC;
2690 CCValid = SystemZ::CCMASK_TDC;
2691 return true;
2692
2693 default:
2694 return false;
2695 }
2696}
2697
2698// Emit an intrinsic with chain and an explicit CC register result.
2700 unsigned Opcode) {
2701 // Copy all operands except the intrinsic ID.
2702 unsigned NumOps = Op.getNumOperands();
2704 Ops.reserve(NumOps - 1);
2705 Ops.push_back(Op.getOperand(0));
2706 for (unsigned I = 2; I < NumOps; ++I)
2707 Ops.push_back(Op.getOperand(I));
2708
2709 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2710 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2711 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2712 SDValue OldChain = SDValue(Op.getNode(), 1);
2713 SDValue NewChain = SDValue(Intr.getNode(), 1);
2714 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2715 return Intr.getNode();
2716}
2717
2718// Emit an intrinsic with an explicit CC register result.
2720 unsigned Opcode) {
2721 // Copy all operands except the intrinsic ID.
2722 unsigned NumOps = Op.getNumOperands();
2724 Ops.reserve(NumOps - 1);
2725 for (unsigned I = 1; I < NumOps; ++I)
2726 Ops.push_back(Op.getOperand(I));
2727
2728 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2729 return Intr.getNode();
2730}
2731
2732// CC is a comparison that will be implemented using an integer or
2733// floating-point comparison. Return the condition code mask for
2734// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2735// unsigned comparisons and clear for signed ones. In the floating-point
2736// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2738#define CONV(X) \
2739 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2740 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2741 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2742
2743 switch (CC) {
2744 default:
2745 llvm_unreachable("Invalid integer condition!");
2746
2747 CONV(EQ);
2748 CONV(NE);
2749 CONV(GT);
2750 CONV(GE);
2751 CONV(LT);
2752 CONV(LE);
2753
2754 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2756 }
2757#undef CONV
2758}
2759
2760// If C can be converted to a comparison against zero, adjust the operands
2761// as necessary.
2762static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2763 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2764 return;
2765
2766 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2767 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2768 return;
2769
2770 int64_t Value = ConstOp1->getSExtValue();
2771 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2772 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2773 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2774 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2775 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2776 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2777 }
2778}
2779
2780// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2781// adjust the operands as necessary.
2782static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2783 Comparison &C) {
2784 // For us to make any changes, it must a comparison between a single-use
2785 // load and a constant.
2786 if (!C.Op0.hasOneUse() ||
2787 C.Op0.getOpcode() != ISD::LOAD ||
2788 C.Op1.getOpcode() != ISD::Constant)
2789 return;
2790
2791 // We must have an 8- or 16-bit load.
2792 auto *Load = cast<LoadSDNode>(C.Op0);
2793 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2794 if ((NumBits != 8 && NumBits != 16) ||
2795 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2796 return;
2797
2798 // The load must be an extending one and the constant must be within the
2799 // range of the unextended value.
2800 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2801 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2802 return;
2803 uint64_t Value = ConstOp1->getZExtValue();
2804 uint64_t Mask = (1 << NumBits) - 1;
2805 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2806 // Make sure that ConstOp1 is in range of C.Op0.
2807 int64_t SignedValue = ConstOp1->getSExtValue();
2808 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2809 return;
2810 if (C.ICmpType != SystemZICMP::SignedOnly) {
2811 // Unsigned comparison between two sign-extended values is equivalent
2812 // to unsigned comparison between two zero-extended values.
2813 Value &= Mask;
2814 } else if (NumBits == 8) {
2815 // Try to treat the comparison as unsigned, so that we can use CLI.
2816 // Adjust CCMask and Value as necessary.
2817 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2818 // Test whether the high bit of the byte is set.
2819 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2820 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2821 // Test whether the high bit of the byte is clear.
2822 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2823 else
2824 // No instruction exists for this combination.
2825 return;
2826 C.ICmpType = SystemZICMP::UnsignedOnly;
2827 }
2828 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2829 if (Value > Mask)
2830 return;
2831 // If the constant is in range, we can use any comparison.
2832 C.ICmpType = SystemZICMP::Any;
2833 } else
2834 return;
2835
2836 // Make sure that the first operand is an i32 of the right extension type.
2837 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2840 if (C.Op0.getValueType() != MVT::i32 ||
2841 Load->getExtensionType() != ExtType) {
2842 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2843 Load->getBasePtr(), Load->getPointerInfo(),
2844 Load->getMemoryVT(), Load->getAlign(),
2845 Load->getMemOperand()->getFlags());
2846 // Update the chain uses.
2847 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2848 }
2849
2850 // Make sure that the second operand is an i32 with the right value.
2851 if (C.Op1.getValueType() != MVT::i32 ||
2852 Value != ConstOp1->getZExtValue())
2853 C.Op1 = DAG.getConstant((uint32_t)Value, DL, MVT::i32);
2854}
2855
2856// Return true if Op is either an unextended load, or a load suitable
2857// for integer register-memory comparisons of type ICmpType.
2858static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2859 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2860 if (Load) {
2861 // There are no instructions to compare a register with a memory byte.
2862 if (Load->getMemoryVT() == MVT::i8)
2863 return false;
2864 // Otherwise decide on extension type.
2865 switch (Load->getExtensionType()) {
2866 case ISD::NON_EXTLOAD:
2867 return true;
2868 case ISD::SEXTLOAD:
2869 return ICmpType != SystemZICMP::UnsignedOnly;
2870 case ISD::ZEXTLOAD:
2871 return ICmpType != SystemZICMP::SignedOnly;
2872 default:
2873 break;
2874 }
2875 }
2876 return false;
2877}
2878
2879// Return true if it is better to swap the operands of C.
2880static bool shouldSwapCmpOperands(const Comparison &C) {
2881 // Leave i128 and f128 comparisons alone, since they have no memory forms.
2882 if (C.Op0.getValueType() == MVT::i128)
2883 return false;
2884 if (C.Op0.getValueType() == MVT::f128)
2885 return false;
2886
2887 // Always keep a floating-point constant second, since comparisons with
2888 // zero can use LOAD TEST and comparisons with other constants make a
2889 // natural memory operand.
2890 if (isa<ConstantFPSDNode>(C.Op1))
2891 return false;
2892
2893 // Never swap comparisons with zero since there are many ways to optimize
2894 // those later.
2895 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2896 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2897 return false;
2898
2899 // Also keep natural memory operands second if the loaded value is
2900 // only used here. Several comparisons have memory forms.
2901 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2902 return false;
2903
2904 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2905 // In that case we generally prefer the memory to be second.
2906 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2907 // The only exceptions are when the second operand is a constant and
2908 // we can use things like CHHSI.
2909 if (!ConstOp1)
2910 return true;
2911 // The unsigned memory-immediate instructions can handle 16-bit
2912 // unsigned integers.
2913 if (C.ICmpType != SystemZICMP::SignedOnly &&
2914 isUInt<16>(ConstOp1->getZExtValue()))
2915 return false;
2916 // The signed memory-immediate instructions can handle 16-bit
2917 // signed integers.
2918 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2919 isInt<16>(ConstOp1->getSExtValue()))
2920 return false;
2921 return true;
2922 }
2923
2924 // Try to promote the use of CGFR and CLGFR.
2925 unsigned Opcode0 = C.Op0.getOpcode();
2926 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2927 return true;
2928 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2929 return true;
2930 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
2931 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2932 C.Op0.getConstantOperandVal(1) == 0xffffffff)
2933 return true;
2934
2935 return false;
2936}
2937
2938// Check whether C tests for equality between X and Y and whether X - Y
2939// or Y - X is also computed. In that case it's better to compare the
2940// result of the subtraction against zero.
2942 Comparison &C) {
2943 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2944 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2945 for (SDNode *N : C.Op0->users()) {
2946 if (N->getOpcode() == ISD::SUB &&
2947 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2948 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2949 // Disable the nsw and nuw flags: the backend needs to handle
2950 // overflow as well during comparison elimination.
2951 N->dropFlags(SDNodeFlags::NoWrap);
2952 C.Op0 = SDValue(N, 0);
2953 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2954 return;
2955 }
2956 }
2957 }
2958}
2959
2960// Check whether C compares a floating-point value with zero and if that
2961// floating-point value is also negated. In this case we can use the
2962// negation to set CC, so avoiding separate LOAD AND TEST and
2963// LOAD (NEGATIVE/COMPLEMENT) instructions.
2964static void adjustForFNeg(Comparison &C) {
2965 // This optimization is invalid for strict comparisons, since FNEG
2966 // does not raise any exceptions.
2967 if (C.Chain)
2968 return;
2969 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2970 if (C1 && C1->isZero()) {
2971 for (SDNode *N : C.Op0->users()) {
2972 if (N->getOpcode() == ISD::FNEG) {
2973 C.Op0 = SDValue(N, 0);
2974 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2975 return;
2976 }
2977 }
2978 }
2979}
2980
2981// Check whether C compares (shl X, 32) with 0 and whether X is
2982// also sign-extended. In that case it is better to test the result
2983// of the sign extension using LTGFR.
2984//
2985// This case is important because InstCombine transforms a comparison
2986// with (sext (trunc X)) into a comparison with (shl X, 32).
2987static void adjustForLTGFR(Comparison &C) {
2988 // Check for a comparison between (shl X, 32) and 0.
2989 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
2990 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
2991 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2992 if (C1 && C1->getZExtValue() == 32) {
2993 SDValue ShlOp0 = C.Op0.getOperand(0);
2994 // See whether X has any SIGN_EXTEND_INREG uses.
2995 for (SDNode *N : ShlOp0->users()) {
2996 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2997 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2998 C.Op0 = SDValue(N, 0);
2999 return;
3000 }
3001 }
3002 }
3003 }
3004}
3005
3006// If C compares the truncation of an extending load, try to compare
3007// the untruncated value instead. This exposes more opportunities to
3008// reuse CC.
3009static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
3010 Comparison &C) {
3011 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
3012 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
3013 C.Op1.getOpcode() == ISD::Constant &&
3014 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3015 C.Op1->getAsZExtVal() == 0) {
3016 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
3017 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
3018 C.Op0.getValueSizeInBits().getFixedValue()) {
3019 unsigned Type = L->getExtensionType();
3020 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
3021 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
3022 C.Op0 = C.Op0.getOperand(0);
3023 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
3024 }
3025 }
3026 }
3027}
3028
3029// Return true if shift operation N has an in-range constant shift value.
3030// Store it in ShiftVal if so.
3031static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
3032 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
3033 if (!Shift)
3034 return false;
3035
3036 uint64_t Amount = Shift->getZExtValue();
3037 if (Amount >= N.getValueSizeInBits())
3038 return false;
3039
3040 ShiftVal = Amount;
3041 return true;
3042}
3043
3044// Check whether an AND with Mask is suitable for a TEST UNDER MASK
3045// instruction and whether the CC value is descriptive enough to handle
3046// a comparison of type Opcode between the AND result and CmpVal.
3047// CCMask says which comparison result is being tested and BitSize is
3048// the number of bits in the operands. If TEST UNDER MASK can be used,
3049// return the corresponding CC mask, otherwise return 0.
3050static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
3051 uint64_t Mask, uint64_t CmpVal,
3052 unsigned ICmpType) {
3053 assert(Mask != 0 && "ANDs with zero should have been removed by now");
3054
3055 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
3056 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
3057 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
3058 return 0;
3059
3060 // Work out the masks for the lowest and highest bits.
3062 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
3063
3064 // Signed ordered comparisons are effectively unsigned if the sign
3065 // bit is dropped.
3066 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
3067
3068 // Check for equality comparisons with 0, or the equivalent.
3069 if (CmpVal == 0) {
3070 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3072 if (CCMask == SystemZ::CCMASK_CMP_NE)
3074 }
3075 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
3076 if (CCMask == SystemZ::CCMASK_CMP_LT)
3078 if (CCMask == SystemZ::CCMASK_CMP_GE)
3080 }
3081 if (EffectivelyUnsigned && CmpVal < Low) {
3082 if (CCMask == SystemZ::CCMASK_CMP_LE)
3084 if (CCMask == SystemZ::CCMASK_CMP_GT)
3086 }
3087
3088 // Check for equality comparisons with the mask, or the equivalent.
3089 if (CmpVal == Mask) {
3090 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3092 if (CCMask == SystemZ::CCMASK_CMP_NE)
3094 }
3095 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
3096 if (CCMask == SystemZ::CCMASK_CMP_GT)
3098 if (CCMask == SystemZ::CCMASK_CMP_LE)
3100 }
3101 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
3102 if (CCMask == SystemZ::CCMASK_CMP_GE)
3104 if (CCMask == SystemZ::CCMASK_CMP_LT)
3106 }
3107
3108 // Check for ordered comparisons with the top bit.
3109 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
3110 if (CCMask == SystemZ::CCMASK_CMP_LE)
3112 if (CCMask == SystemZ::CCMASK_CMP_GT)
3114 }
3115 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
3116 if (CCMask == SystemZ::CCMASK_CMP_LT)
3118 if (CCMask == SystemZ::CCMASK_CMP_GE)
3120 }
3121
3122 // If there are just two bits, we can do equality checks for Low and High
3123 // as well.
3124 if (Mask == Low + High) {
3125 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
3127 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
3129 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
3131 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
3133 }
3134
3135 // Looks like we've exhausted our options.
3136 return 0;
3137}
3138
3139// See whether C can be implemented as a TEST UNDER MASK instruction.
3140// Update the arguments with the TM version if so.
3142 Comparison &C) {
3143 // Use VECTOR TEST UNDER MASK for i128 operations.
3144 if (C.Op0.getValueType() == MVT::i128) {
3145 // We can use VTM for EQ/NE comparisons of x & y against 0.
3146 if (C.Op0.getOpcode() == ISD::AND &&
3147 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3148 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
3149 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
3150 if (Mask && Mask->getAPIntValue() == 0) {
3151 C.Opcode = SystemZISD::VTM;
3152 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
3153 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
3154 C.CCValid = SystemZ::CCMASK_VCMP;
3155 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3156 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3157 else
3158 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3159 }
3160 }
3161 return;
3162 }
3163
3164 // Check that we have a comparison with a constant.
3165 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
3166 if (!ConstOp1)
3167 return;
3168 uint64_t CmpVal = ConstOp1->getZExtValue();
3169
3170 // Check whether the nonconstant input is an AND with a constant mask.
3171 Comparison NewC(C);
3172 uint64_t MaskVal;
3173 ConstantSDNode *Mask = nullptr;
3174 if (C.Op0.getOpcode() == ISD::AND) {
3175 NewC.Op0 = C.Op0.getOperand(0);
3176 NewC.Op1 = C.Op0.getOperand(1);
3177 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
3178 if (!Mask)
3179 return;
3180 MaskVal = Mask->getZExtValue();
3181 } else {
3182 // There is no instruction to compare with a 64-bit immediate
3183 // so use TMHH instead if possible. We need an unsigned ordered
3184 // comparison with an i64 immediate.
3185 if (NewC.Op0.getValueType() != MVT::i64 ||
3186 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
3187 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
3188 NewC.ICmpType == SystemZICMP::SignedOnly)
3189 return;
3190 // Convert LE and GT comparisons into LT and GE.
3191 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
3192 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
3193 if (CmpVal == uint64_t(-1))
3194 return;
3195 CmpVal += 1;
3196 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
3197 }
3198 // If the low N bits of Op1 are zero than the low N bits of Op0 can
3199 // be masked off without changing the result.
3200 MaskVal = -(CmpVal & -CmpVal);
3201 NewC.ICmpType = SystemZICMP::UnsignedOnly;
3202 }
3203 if (!MaskVal)
3204 return;
3205
3206 // Check whether the combination of mask, comparison value and comparison
3207 // type are suitable.
3208 unsigned BitSize = NewC.Op0.getValueSizeInBits();
3209 unsigned NewCCMask, ShiftVal;
3210 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3211 NewC.Op0.getOpcode() == ISD::SHL &&
3212 isSimpleShift(NewC.Op0, ShiftVal) &&
3213 (MaskVal >> ShiftVal != 0) &&
3214 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
3215 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3216 MaskVal >> ShiftVal,
3217 CmpVal >> ShiftVal,
3218 SystemZICMP::Any))) {
3219 NewC.Op0 = NewC.Op0.getOperand(0);
3220 MaskVal >>= ShiftVal;
3221 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3222 NewC.Op0.getOpcode() == ISD::SRL &&
3223 isSimpleShift(NewC.Op0, ShiftVal) &&
3224 (MaskVal << ShiftVal != 0) &&
3225 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
3226 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3227 MaskVal << ShiftVal,
3228 CmpVal << ShiftVal,
3230 NewC.Op0 = NewC.Op0.getOperand(0);
3231 MaskVal <<= ShiftVal;
3232 } else {
3233 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
3234 NewC.ICmpType);
3235 if (!NewCCMask)
3236 return;
3237 }
3238
3239 // Go ahead and make the change.
3240 C.Opcode = SystemZISD::TM;
3241 C.Op0 = NewC.Op0;
3242 if (Mask && Mask->getZExtValue() == MaskVal)
3243 C.Op1 = SDValue(Mask, 0);
3244 else
3245 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
3246 C.CCValid = SystemZ::CCMASK_TM;
3247 C.CCMask = NewCCMask;
3248}
3249
3250// Implement i128 comparison in vector registers.
3251static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
3252 Comparison &C) {
3253 if (C.Opcode != SystemZISD::ICMP)
3254 return;
3255 if (C.Op0.getValueType() != MVT::i128)
3256 return;
3257 if (DAG.getSubtarget<SystemZSubtarget>().hasVectorEnhancements3())
3258 return;
3259
3260 // (In-)Equality comparisons can be implemented via VCEQGS.
3261 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3262 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3263 C.Opcode = SystemZISD::VICMPES;
3264 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
3265 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
3266 C.CCValid = SystemZ::CCMASK_VCMP;
3267 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3268 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3269 else
3270 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3271 return;
3272 }
3273
3274 // Normalize other comparisons to GT.
3275 bool Swap = false, Invert = false;
3276 switch (C.CCMask) {
3277 case SystemZ::CCMASK_CMP_GT: break;
3278 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
3279 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
3280 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
3281 default: llvm_unreachable("Invalid integer condition!");
3282 }
3283 if (Swap)
3284 std::swap(C.Op0, C.Op1);
3285
3286 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3287 C.Opcode = SystemZISD::UCMP128HI;
3288 else
3289 C.Opcode = SystemZISD::SCMP128HI;
3290 C.CCValid = SystemZ::CCMASK_ANY;
3291 C.CCMask = SystemZ::CCMASK_1;
3292
3293 if (Invert)
3294 C.CCMask ^= C.CCValid;
3295}
3296
3297// See whether the comparison argument contains a redundant AND
3298// and remove it if so. This sometimes happens due to the generic
3299// BRCOND expansion.
3301 Comparison &C) {
3302 if (C.Op0.getOpcode() != ISD::AND)
3303 return;
3304 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3305 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3306 return;
3307 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3308 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3309 return;
3310
3311 C.Op0 = C.Op0.getOperand(0);
3312}
3313
3314// Return a Comparison that tests the condition-code result of intrinsic
3315// node Call against constant integer CC using comparison code Cond.
3316// Opcode is the opcode of the SystemZISD operation for the intrinsic
3317// and CCValid is the set of possible condition-code results.
3318static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3319 SDValue Call, unsigned CCValid, uint64_t CC,
3321 Comparison C(Call, SDValue(), SDValue());
3322 C.Opcode = Opcode;
3323 C.CCValid = CCValid;
3324 if (Cond == ISD::SETEQ)
3325 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3326 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3327 else if (Cond == ISD::SETNE)
3328 // ...and the inverse of that.
3329 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3330 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3331 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3332 // always true for CC>3.
3333 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3334 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3335 // ...and the inverse of that.
3336 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3337 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3338 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3339 // always true for CC>3.
3340 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3341 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3342 // ...and the inverse of that.
3343 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3344 else
3345 llvm_unreachable("Unexpected integer comparison type");
3346 C.CCMask &= CCValid;
3347 return C;
3348}
3349
3350// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3351static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3352 ISD::CondCode Cond, const SDLoc &DL,
3353 SDValue Chain = SDValue(),
3354 bool IsSignaling = false) {
3355 if (CmpOp1.getOpcode() == ISD::Constant) {
3356 assert(!Chain);
3357 unsigned Opcode, CCValid;
3358 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3359 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3360 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3361 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3362 CmpOp1->getAsZExtVal(), Cond);
3363 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3364 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3365 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3366 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3367 CmpOp1->getAsZExtVal(), Cond);
3368 }
3369 Comparison C(CmpOp0, CmpOp1, Chain);
3370 C.CCMask = CCMaskForCondCode(Cond);
3371 if (C.Op0.getValueType().isFloatingPoint()) {
3372 C.CCValid = SystemZ::CCMASK_FCMP;
3373 if (!C.Chain)
3374 C.Opcode = SystemZISD::FCMP;
3375 else if (!IsSignaling)
3376 C.Opcode = SystemZISD::STRICT_FCMP;
3377 else
3378 C.Opcode = SystemZISD::STRICT_FCMPS;
3380 } else {
3381 assert(!C.Chain);
3382 C.CCValid = SystemZ::CCMASK_ICMP;
3383 C.Opcode = SystemZISD::ICMP;
3384 // Choose the type of comparison. Equality and inequality tests can
3385 // use either signed or unsigned comparisons. The choice also doesn't
3386 // matter if both sign bits are known to be clear. In those cases we
3387 // want to give the main isel code the freedom to choose whichever
3388 // form fits best.
3389 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3390 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3391 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3392 C.ICmpType = SystemZICMP::Any;
3393 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3394 C.ICmpType = SystemZICMP::UnsignedOnly;
3395 else
3396 C.ICmpType = SystemZICMP::SignedOnly;
3397 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3398 adjustForRedundantAnd(DAG, DL, C);
3399 adjustZeroCmp(DAG, DL, C);
3400 adjustSubwordCmp(DAG, DL, C);
3401 adjustForSubtraction(DAG, DL, C);
3403 adjustICmpTruncate(DAG, DL, C);
3404 }
3405
3406 if (shouldSwapCmpOperands(C)) {
3407 std::swap(C.Op0, C.Op1);
3408 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3409 }
3410
3412 adjustICmp128(DAG, DL, C);
3413 return C;
3414}
3415
3416// Emit the comparison instruction described by C.
3417static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3418 if (!C.Op1.getNode()) {
3419 SDNode *Node;
3420 switch (C.Op0.getOpcode()) {
3422 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3423 return SDValue(Node, 0);
3425 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3426 return SDValue(Node, Node->getNumValues() - 1);
3427 default:
3428 llvm_unreachable("Invalid comparison operands");
3429 }
3430 }
3431 if (C.Opcode == SystemZISD::ICMP)
3432 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3433 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3434 if (C.Opcode == SystemZISD::TM) {
3435 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3437 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3438 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3439 }
3440 if (C.Opcode == SystemZISD::VICMPES) {
3441 SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32);
3442 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3443 return SDValue(Val.getNode(), 1);
3444 }
3445 if (C.Chain) {
3446 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3447 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3448 }
3449 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3450}
3451
3452// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3453// 64 bits. Extend is the extension type to use. Store the high part
3454// in Hi and the low part in Lo.
3455static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3456 SDValue Op0, SDValue Op1, SDValue &Hi,
3457 SDValue &Lo) {
3458 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3459 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3460 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3461 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3462 DAG.getConstant(32, DL, MVT::i64));
3463 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3464 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3465}
3466
3467// Lower a binary operation that produces two VT results, one in each
3468// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3469// and Opcode performs the GR128 operation. Store the even register result
3470// in Even and the odd register result in Odd.
3471static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3472 unsigned Opcode, SDValue Op0, SDValue Op1,
3473 SDValue &Even, SDValue &Odd) {
3474 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3475 bool Is32Bit = is32Bit(VT);
3476 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3477 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3478}
3479
3480// Return an i32 value that is 1 if the CC value produced by CCReg is
3481// in the mask CCMask and 0 otherwise. CC is known to have a value
3482// in CCValid, so other values can be ignored.
3483static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3484 unsigned CCValid, unsigned CCMask) {
3485 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3486 DAG.getConstant(0, DL, MVT::i32),
3487 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3488 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3489 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3490}
3491
3492// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3493// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3494// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3495// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3496// floating-point comparisons.
3499 switch (CC) {
3500 case ISD::SETOEQ:
3501 case ISD::SETEQ:
3502 switch (Mode) {
3503 case CmpMode::Int: return SystemZISD::VICMPE;
3504 case CmpMode::FP: return SystemZISD::VFCMPE;
3505 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3506 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3507 }
3508 llvm_unreachable("Bad mode");
3509
3510 case ISD::SETOGE:
3511 case ISD::SETGE:
3512 switch (Mode) {
3513 case CmpMode::Int: return 0;
3514 case CmpMode::FP: return SystemZISD::VFCMPHE;
3515 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3516 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3517 }
3518 llvm_unreachable("Bad mode");
3519
3520 case ISD::SETOGT:
3521 case ISD::SETGT:
3522 switch (Mode) {
3523 case CmpMode::Int: return SystemZISD::VICMPH;
3524 case CmpMode::FP: return SystemZISD::VFCMPH;
3525 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3526 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3527 }
3528 llvm_unreachable("Bad mode");
3529
3530 case ISD::SETUGT:
3531 switch (Mode) {
3532 case CmpMode::Int: return SystemZISD::VICMPHL;
3533 case CmpMode::FP: return 0;
3534 case CmpMode::StrictFP: return 0;
3535 case CmpMode::SignalingFP: return 0;
3536 }
3537 llvm_unreachable("Bad mode");
3538
3539 default:
3540 return 0;
3541 }
3542}
3543
3544// Return the SystemZISD vector comparison operation for CC or its inverse,
3545// or 0 if neither can be done directly. Indicate in Invert whether the
3546// result is for the inverse of CC. Mode is as above.
3548 bool &Invert) {
3549 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3550 Invert = false;
3551 return Opcode;
3552 }
3553
3554 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3555 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3556 Invert = true;
3557 return Opcode;
3558 }
3559
3560 return 0;
3561}
3562
3563// Return a v2f64 that contains the extended form of elements Start and Start+1
3564// of v4f32 value Op. If Chain is nonnull, return the strict form.
3565static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3566 SDValue Op, SDValue Chain) {
3567 int Mask[] = { Start, -1, Start + 1, -1 };
3568 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3569 if (Chain) {
3570 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3571 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3572 }
3573 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3574}
3575
3576// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3577// producing a result of type VT. If Chain is nonnull, return the strict form.
3578SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3579 const SDLoc &DL, EVT VT,
3580 SDValue CmpOp0,
3581 SDValue CmpOp1,
3582 SDValue Chain) const {
3583 // There is no hardware support for v4f32 (unless we have the vector
3584 // enhancements facility 1), so extend the vector into two v2f64s
3585 // and compare those.
3586 if (CmpOp0.getValueType() == MVT::v4f32 &&
3587 !Subtarget.hasVectorEnhancements1()) {
3588 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3589 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3590 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3591 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3592 if (Chain) {
3593 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3594 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3595 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3596 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3597 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3598 H1.getValue(1), L1.getValue(1),
3599 HRes.getValue(1), LRes.getValue(1) };
3600 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3601 SDValue Ops[2] = { Res, NewChain };
3602 return DAG.getMergeValues(Ops, DL);
3603 }
3604 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3605 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3606 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3607 }
3608 if (Chain) {
3609 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3610 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3611 }
3612 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3613}
3614
3615// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3616// an integer mask of type VT. If Chain is nonnull, we have a strict
3617// floating-point comparison. If in addition IsSignaling is true, we have
3618// a strict signaling floating-point comparison.
3619SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3620 const SDLoc &DL, EVT VT,
3622 SDValue CmpOp0,
3623 SDValue CmpOp1,
3624 SDValue Chain,
3625 bool IsSignaling) const {
3626 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3627 assert (!Chain || IsFP);
3628 assert (!IsSignaling || Chain);
3629 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3630 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3631 bool Invert = false;
3632 SDValue Cmp;
3633 switch (CC) {
3634 // Handle tests for order using (or (ogt y x) (oge x y)).
3635 case ISD::SETUO:
3636 Invert = true;
3637 [[fallthrough]];
3638 case ISD::SETO: {
3639 assert(IsFP && "Unexpected integer comparison");
3640 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3641 DL, VT, CmpOp1, CmpOp0, Chain);
3642 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3643 DL, VT, CmpOp0, CmpOp1, Chain);
3644 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3645 if (Chain)
3646 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3647 LT.getValue(1), GE.getValue(1));
3648 break;
3649 }
3650
3651 // Handle <> tests using (or (ogt y x) (ogt x y)).
3652 case ISD::SETUEQ:
3653 Invert = true;
3654 [[fallthrough]];
3655 case ISD::SETONE: {
3656 assert(IsFP && "Unexpected integer comparison");
3657 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3658 DL, VT, CmpOp1, CmpOp0, Chain);
3659 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3660 DL, VT, CmpOp0, CmpOp1, Chain);
3661 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3662 if (Chain)
3663 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3664 LT.getValue(1), GT.getValue(1));
3665 break;
3666 }
3667
3668 // Otherwise a single comparison is enough. It doesn't really
3669 // matter whether we try the inversion or the swap first, since
3670 // there are no cases where both work.
3671 default:
3672 // Optimize sign-bit comparisons to signed compares.
3673 if (Mode == CmpMode::Int && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
3675 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3676 APInt Mask;
3677 if (CmpOp0.getOpcode() == ISD::AND
3678 && ISD::isConstantSplatVector(CmpOp0.getOperand(1).getNode(), Mask)
3679 && Mask == APInt::getSignMask(EltSize)) {
3681 CmpOp0 = CmpOp0.getOperand(0);
3682 }
3683 }
3684 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3685 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3686 else {
3688 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3689 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3690 else
3691 llvm_unreachable("Unhandled comparison");
3692 }
3693 if (Chain)
3694 Chain = Cmp.getValue(1);
3695 break;
3696 }
3697 if (Invert) {
3698 SDValue Mask =
3699 DAG.getSplatBuildVector(VT, DL, DAG.getAllOnesConstant(DL, MVT::i64));
3700 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3701 }
3702 if (Chain && Chain.getNode() != Cmp.getNode()) {
3703 SDValue Ops[2] = { Cmp, Chain };
3704 Cmp = DAG.getMergeValues(Ops, DL);
3705 }
3706 return Cmp;
3707}
3708
3709SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3710 SelectionDAG &DAG) const {
3711 SDValue CmpOp0 = Op.getOperand(0);
3712 SDValue CmpOp1 = Op.getOperand(1);
3713 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3714 SDLoc DL(Op);
3715 EVT VT = Op.getValueType();
3716 if (VT.isVector())
3717 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3718
3719 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3720 SDValue CCReg = emitCmp(DAG, DL, C);
3721 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3722}
3723
3724SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3725 SelectionDAG &DAG,
3726 bool IsSignaling) const {
3727 SDValue Chain = Op.getOperand(0);
3728 SDValue CmpOp0 = Op.getOperand(1);
3729 SDValue CmpOp1 = Op.getOperand(2);
3730 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3731 SDLoc DL(Op);
3732 EVT VT = Op.getNode()->getValueType(0);
3733 if (VT.isVector()) {
3734 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3735 Chain, IsSignaling);
3736 return Res.getValue(Op.getResNo());
3737 }
3738
3739 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3740 SDValue CCReg = emitCmp(DAG, DL, C);
3741 CCReg->setFlags(Op->getFlags());
3742 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3743 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3744 return DAG.getMergeValues(Ops, DL);
3745}
3746
3747SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3748 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3749 SDValue CmpOp0 = Op.getOperand(2);
3750 SDValue CmpOp1 = Op.getOperand(3);
3751 SDValue Dest = Op.getOperand(4);
3752 SDLoc DL(Op);
3753
3754 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3755 SDValue CCReg = emitCmp(DAG, DL, C);
3756 return DAG.getNode(
3757 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3758 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3759 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3760}
3761
3762// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3763// allowing Pos and Neg to be wider than CmpOp.
3764static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3765 return (Neg.getOpcode() == ISD::SUB &&
3766 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3767 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
3768 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3769 Pos.getOperand(0) == CmpOp)));
3770}
3771
3772// Return the absolute or negative absolute of Op; IsNegative decides which.
3774 bool IsNegative) {
3775 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3776 if (IsNegative)
3777 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3778 DAG.getConstant(0, DL, Op.getValueType()), Op);
3779 return Op;
3780}
3781
3783 Comparison C, SDValue TrueOp, SDValue FalseOp) {
3784 EVT VT = MVT::i128;
3785 unsigned Op;
3786
3787 if (C.CCMask == SystemZ::CCMASK_CMP_NE ||
3788 C.CCMask == SystemZ::CCMASK_CMP_GE ||
3789 C.CCMask == SystemZ::CCMASK_CMP_LE) {
3790 std::swap(TrueOp, FalseOp);
3791 C.CCMask ^= C.CCValid;
3792 }
3793 if (C.CCMask == SystemZ::CCMASK_CMP_LT) {
3794 std::swap(C.Op0, C.Op1);
3795 C.CCMask = SystemZ::CCMASK_CMP_GT;
3796 }
3797 switch (C.CCMask) {
3800 break;
3802 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3804 else
3806 break;
3807 default:
3808 llvm_unreachable("Unhandled comparison");
3809 break;
3810 }
3811
3812 SDValue Mask = DAG.getNode(Op, DL, VT, C.Op0, C.Op1);
3813 TrueOp = DAG.getNode(ISD::AND, DL, VT, TrueOp, Mask);
3814 FalseOp = DAG.getNode(ISD::AND, DL, VT, FalseOp, DAG.getNOT(DL, Mask, VT));
3815 return DAG.getNode(ISD::OR, DL, VT, TrueOp, FalseOp);
3816}
3817
3818SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3819 SelectionDAG &DAG) const {
3820 SDValue CmpOp0 = Op.getOperand(0);
3821 SDValue CmpOp1 = Op.getOperand(1);
3822 SDValue TrueOp = Op.getOperand(2);
3823 SDValue FalseOp = Op.getOperand(3);
3824 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3825 SDLoc DL(Op);
3826
3827 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3828
3829 // Check for absolute and negative-absolute selections, including those
3830 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3831 // This check supplements the one in DAGCombiner.
3832 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3833 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3834 C.Op1.getOpcode() == ISD::Constant &&
3835 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3836 C.Op1->getAsZExtVal() == 0) {
3837 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3838 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3839 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3840 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3841 }
3842
3843 if (Subtarget.hasVectorEnhancements3() &&
3844 C.Opcode == SystemZISD::ICMP &&
3845 C.Op0.getValueType() == MVT::i128 &&
3846 TrueOp.getValueType() == MVT::i128) {
3847 return getI128Select(DAG, DL, C, TrueOp, FalseOp);
3848 }
3849
3850 SDValue CCReg = emitCmp(DAG, DL, C);
3851 SDValue Ops[] = {TrueOp, FalseOp,
3852 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3853 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3854
3855 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3856}
3857
3858SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3859 SelectionDAG &DAG) const {
3860 SDLoc DL(Node);
3861 const GlobalValue *GV = Node->getGlobal();
3862 int64_t Offset = Node->getOffset();
3863 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3865
3867 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3868 if (isInt<32>(Offset)) {
3869 // Assign anchors at 1<<12 byte boundaries.
3870 uint64_t Anchor = Offset & ~uint64_t(0xfff);
3871 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3872 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3873
3874 // The offset can be folded into the address if it is aligned to a
3875 // halfword.
3876 Offset -= Anchor;
3877 if (Offset != 0 && (Offset & 1) == 0) {
3878 SDValue Full =
3879 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3880 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3881 Offset = 0;
3882 }
3883 } else {
3884 // Conservatively load a constant offset greater than 32 bits into a
3885 // register below.
3886 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3887 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3888 }
3889 } else if (Subtarget.isTargetELF()) {
3890 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3891 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3892 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3894 } else if (Subtarget.isTargetzOS()) {
3895 Result = getADAEntry(DAG, GV, DL, PtrVT);
3896 } else
3897 llvm_unreachable("Unexpected Subtarget");
3898
3899 // If there was a non-zero offset that we didn't fold, create an explicit
3900 // addition for it.
3901 if (Offset != 0)
3902 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3903 DAG.getSignedConstant(Offset, DL, PtrVT));
3904
3905 return Result;
3906}
3907
3908SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3909 SelectionDAG &DAG,
3910 unsigned Opcode,
3911 SDValue GOTOffset) const {
3912 SDLoc DL(Node);
3913 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3914 SDValue Chain = DAG.getEntryNode();
3915 SDValue Glue;
3916
3919 report_fatal_error("In GHC calling convention TLS is not supported");
3920
3921 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3922 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3923 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3924 Glue = Chain.getValue(1);
3925 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3926 Glue = Chain.getValue(1);
3927
3928 // The first call operand is the chain and the second is the TLS symbol.
3930 Ops.push_back(Chain);
3931 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3932 Node->getValueType(0),
3933 0, 0));
3934
3935 // Add argument registers to the end of the list so that they are
3936 // known live into the call.
3937 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3938 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3939
3940 // Add a register mask operand representing the call-preserved registers.
3941 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3942 const uint32_t *Mask =
3943 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3944 assert(Mask && "Missing call preserved mask for calling convention");
3945 Ops.push_back(DAG.getRegisterMask(Mask));
3946
3947 // Glue the call to the argument copies.
3948 Ops.push_back(Glue);
3949
3950 // Emit the call.
3951 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3952 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3953 Glue = Chain.getValue(1);
3954
3955 // Copy the return value from %r2.
3956 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3957}
3958
3959SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3960 SelectionDAG &DAG) const {
3961 SDValue Chain = DAG.getEntryNode();
3962 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3963
3964 // The high part of the thread pointer is in access register 0.
3965 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3966 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3967
3968 // The low part of the thread pointer is in access register 1.
3969 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3970 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3971
3972 // Merge them into a single 64-bit address.
3973 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3974 DAG.getConstant(32, DL, PtrVT));
3975 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3976}
3977
3978SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3979 SelectionDAG &DAG) const {
3980 if (DAG.getTarget().useEmulatedTLS())
3981 return LowerToTLSEmulatedModel(Node, DAG);
3982 SDLoc DL(Node);
3983 const GlobalValue *GV = Node->getGlobal();
3984 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3985 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
3986
3989 report_fatal_error("In GHC calling convention TLS is not supported");
3990
3991 SDValue TP = lowerThreadPointer(DL, DAG);
3992
3993 // Get the offset of GA from the thread pointer, based on the TLS model.
3995 switch (model) {
3997 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
4000
4001 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4002 Offset = DAG.getLoad(
4003 PtrVT, DL, DAG.getEntryNode(), Offset,
4005
4006 // Call __tls_get_offset to retrieve the offset.
4007 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
4008 break;
4009 }
4010
4012 // Load the GOT offset of the module ID.
4015
4016 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4017 Offset = DAG.getLoad(
4018 PtrVT, DL, DAG.getEntryNode(), Offset,
4020
4021 // Call __tls_get_offset to retrieve the module base offset.
4022 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
4023
4024 // Note: The SystemZLDCleanupPass will remove redundant computations
4025 // of the module base offset. Count total number of local-dynamic
4026 // accesses to trigger execution of that pass.
4030
4031 // Add the per-symbol offset.
4033
4034 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4035 DTPOffset = DAG.getLoad(
4036 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
4038
4039 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
4040 break;
4041 }
4042
4043 case TLSModel::InitialExec: {
4044 // Load the offset from the GOT.
4045 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
4048 Offset =
4049 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
4051 break;
4052 }
4053
4054 case TLSModel::LocalExec: {
4055 // Force the offset into the constant pool and load it from there.
4058
4059 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4060 Offset = DAG.getLoad(
4061 PtrVT, DL, DAG.getEntryNode(), Offset,
4063 break;
4064 }
4065 }
4066
4067 // Add the base and offset together.
4068 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
4069}
4070
4071SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
4072 SelectionDAG &DAG) const {
4073 SDLoc DL(Node);
4074 const BlockAddress *BA = Node->getBlockAddress();
4075 int64_t Offset = Node->getOffset();
4076 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4077
4078 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
4079 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4080 return Result;
4081}
4082
4083SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
4084 SelectionDAG &DAG) const {
4085 SDLoc DL(JT);
4086 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4087 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
4088
4089 // Use LARL to load the address of the table.
4090 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4091}
4092
4093SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
4094 SelectionDAG &DAG) const {
4095 SDLoc DL(CP);
4096 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4097
4099 if (CP->isMachineConstantPoolEntry())
4100 Result =
4101 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
4102 else
4103 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
4104 CP->getOffset());
4105
4106 // Use LARL to load the address of the constant pool entry.
4107 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4108}
4109
4110SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
4111 SelectionDAG &DAG) const {
4112 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4114 MachineFrameInfo &MFI = MF.getFrameInfo();
4115 MFI.setFrameAddressIsTaken(true);
4116
4117 SDLoc DL(Op);
4118 unsigned Depth = Op.getConstantOperandVal(0);
4119 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4120
4121 // By definition, the frame address is the address of the back chain. (In
4122 // the case of packed stack without backchain, return the address where the
4123 // backchain would have been stored. This will either be an unused space or
4124 // contain a saved register).
4125 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
4126 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
4127
4128 if (Depth > 0) {
4129 // FIXME The frontend should detect this case.
4130 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4131 report_fatal_error("Unsupported stack frame traversal count");
4132
4133 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
4134 while (Depth--) {
4135 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
4137 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
4138 }
4139 }
4140
4141 return BackChain;
4142}
4143
4144SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
4145 SelectionDAG &DAG) const {
4147 MachineFrameInfo &MFI = MF.getFrameInfo();
4148 MFI.setReturnAddressIsTaken(true);
4149
4151 return SDValue();
4152
4153 SDLoc DL(Op);
4154 unsigned Depth = Op.getConstantOperandVal(0);
4155 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4156
4157 if (Depth > 0) {
4158 // FIXME The frontend should detect this case.
4159 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4160 report_fatal_error("Unsupported stack frame traversal count");
4161
4162 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
4163 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4164 int Offset = TFL->getReturnAddressOffset(MF);
4165 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
4166 DAG.getSignedConstant(Offset, DL, PtrVT));
4167 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
4169 }
4170
4171 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an
4172 // implicit live-in.
4175 &SystemZ::GR64BitRegClass);
4176 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
4177}
4178
4179SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
4180 SelectionDAG &DAG) const {
4181 SDLoc DL(Op);
4182 SDValue In = Op.getOperand(0);
4183 EVT InVT = In.getValueType();
4184 EVT ResVT = Op.getValueType();
4185
4186 // Convert loads directly. This is normally done by DAGCombiner,
4187 // but we need this case for bitcasts that are created during lowering
4188 // and which are then lowered themselves.
4189 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
4190 if (ISD::isNormalLoad(LoadN)) {
4191 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
4192 LoadN->getBasePtr(), LoadN->getMemOperand());
4193 // Update the chain uses.
4194 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
4195 return NewLoad;
4196 }
4197
4198 if (InVT == MVT::i32 && ResVT == MVT::f32) {
4199 SDValue In64;
4200 if (Subtarget.hasHighWord()) {
4201 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
4202 MVT::i64);
4203 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4204 MVT::i64, SDValue(U64, 0), In);
4205 } else {
4206 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
4207 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
4208 DAG.getConstant(32, DL, MVT::i64));
4209 }
4210 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
4211 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
4212 DL, MVT::f32, Out64);
4213 }
4214 if (InVT == MVT::f32 && ResVT == MVT::i32) {
4215 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
4216 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4217 MVT::f64, SDValue(U64, 0), In);
4218 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
4219 if (Subtarget.hasHighWord())
4220 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
4221 MVT::i32, Out64);
4222 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
4223 DAG.getConstant(32, DL, MVT::i64));
4224 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
4225 }
4226 llvm_unreachable("Unexpected bitcast combination");
4227}
4228
4229SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
4230 SelectionDAG &DAG) const {
4231
4232 if (Subtarget.isTargetXPLINK64())
4233 return lowerVASTART_XPLINK(Op, DAG);
4234 else
4235 return lowerVASTART_ELF(Op, DAG);
4236}
4237
4238SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
4239 SelectionDAG &DAG) const {
4241 SystemZMachineFunctionInfo *FuncInfo =
4243
4244 SDLoc DL(Op);
4245
4246 // vastart just stores the address of the VarArgsFrameIndex slot into the
4247 // memory location argument.
4248 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4249 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4250 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4251 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4252 MachinePointerInfo(SV));
4253}
4254
4255SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
4256 SelectionDAG &DAG) const {
4258 SystemZMachineFunctionInfo *FuncInfo =
4260 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4261
4262 SDValue Chain = Op.getOperand(0);
4263 SDValue Addr = Op.getOperand(1);
4264 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4265 SDLoc DL(Op);
4266
4267 // The initial values of each field.
4268 const unsigned NumFields = 4;
4269 SDValue Fields[NumFields] = {
4270 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
4271 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
4272 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
4273 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
4274 };
4275
4276 // Store each field into its respective slot.
4277 SDValue MemOps[NumFields];
4278 unsigned Offset = 0;
4279 for (unsigned I = 0; I < NumFields; ++I) {
4280 SDValue FieldAddr = Addr;
4281 if (Offset != 0)
4282 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
4284 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
4286 Offset += 8;
4287 }
4288 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4289}
4290
4291SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
4292 SelectionDAG &DAG) const {
4293 SDValue Chain = Op.getOperand(0);
4294 SDValue DstPtr = Op.getOperand(1);
4295 SDValue SrcPtr = Op.getOperand(2);
4296 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
4297 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4298 SDLoc DL(Op);
4299
4300 uint32_t Sz =
4301 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
4302 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
4303 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
4304 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV),
4305 MachinePointerInfo(SrcSV));
4306}
4307
4308SDValue
4309SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
4310 SelectionDAG &DAG) const {
4311 if (Subtarget.isTargetXPLINK64())
4312 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
4313 else
4314 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
4315}
4316
4317SDValue
4318SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
4319 SelectionDAG &DAG) const {
4320 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4322 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4323 SDValue Chain = Op.getOperand(0);
4324 SDValue Size = Op.getOperand(1);
4325 SDValue Align = Op.getOperand(2);
4326 SDLoc DL(Op);
4327
4328 // If user has set the no alignment function attribute, ignore
4329 // alloca alignments.
4330 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4331
4332 uint64_t StackAlign = TFI->getStackAlignment();
4333 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4334 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4335
4336 SDValue NeededSpace = Size;
4337
4338 // Add extra space for alignment if needed.
4339 EVT PtrVT = getPointerTy(MF.getDataLayout());
4340 if (ExtraAlignSpace)
4341 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
4342 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4343
4344 bool IsSigned = false;
4345 bool DoesNotReturn = false;
4346 bool IsReturnValueUsed = false;
4347 EVT VT = Op.getValueType();
4348 SDValue AllocaCall =
4349 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
4350 CallingConv::C, IsSigned, DL, DoesNotReturn,
4351 IsReturnValueUsed)
4352 .first;
4353
4354 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4355 // to end of call in order to ensure it isn't broken up from the call
4356 // sequence.
4357 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4358 Register SPReg = Regs.getStackPointerRegister();
4359 Chain = AllocaCall.getValue(1);
4360 SDValue Glue = AllocaCall.getValue(2);
4361 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4362 Chain = NewSPRegNode.getValue(1);
4363
4364 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4365 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4366 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4367
4368 // Dynamically realign if needed.
4369 if (ExtraAlignSpace) {
4370 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4371 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4372 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4373 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4374 }
4375
4376 SDValue Ops[2] = {Result, Chain};
4377 return DAG.getMergeValues(Ops, DL);
4378}
4379
4380SDValue
4381SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4382 SelectionDAG &DAG) const {
4383 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4385 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4386 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4387
4388 SDValue Chain = Op.getOperand(0);
4389 SDValue Size = Op.getOperand(1);
4390 SDValue Align = Op.getOperand(2);
4391 SDLoc DL(Op);
4392
4393 // If user has set the no alignment function attribute, ignore
4394 // alloca alignments.
4395 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4396
4397 uint64_t StackAlign = TFI->getStackAlignment();
4398 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4399 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4400
4402 SDValue NeededSpace = Size;
4403
4404 // Get a reference to the stack pointer.
4405 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4406
4407 // If we need a backchain, save it now.
4408 SDValue Backchain;
4409 if (StoreBackchain)
4410 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4412
4413 // Add extra space for alignment if needed.
4414 if (ExtraAlignSpace)
4415 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4416 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4417
4418 // Get the new stack pointer value.
4419 SDValue NewSP;
4420 if (hasInlineStackProbe(MF)) {
4422 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4423 Chain = NewSP.getValue(1);
4424 }
4425 else {
4426 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4427 // Copy the new stack pointer back.
4428 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4429 }
4430
4431 // The allocated data lives above the 160 bytes allocated for the standard
4432 // frame, plus any outgoing stack arguments. We don't know how much that
4433 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4434 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4435 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4436
4437 // Dynamically realign if needed.
4438 if (RequiredAlign > StackAlign) {
4439 Result =
4440 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4441 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4442 Result =
4443 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4444 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4445 }
4446
4447 if (StoreBackchain)
4448 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4450
4451 SDValue Ops[2] = { Result, Chain };
4452 return DAG.getMergeValues(Ops, DL);
4453}
4454
4455SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4456 SDValue Op, SelectionDAG &DAG) const {
4457 SDLoc DL(Op);
4458
4459 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4460}
4461
4462SDValue SystemZTargetLowering::lowerMULH(SDValue Op,
4463 SelectionDAG &DAG,
4464 unsigned Opcode) const {
4465 EVT VT = Op.getValueType();
4466 SDLoc DL(Op);
4467 SDValue Even, Odd;
4468
4469 // This custom expander is only used on arch15 and later for 64-bit types.
4470 assert(!is32Bit(VT));
4471 assert(Subtarget.hasMiscellaneousExtensions2());
4472
4473 // SystemZISD::xMUL_LOHI returns the low result in the odd register and
4474 // the high result in the even register. Return the latter.
4475 lowerGR128Binary(DAG, DL, VT, Opcode,
4476 Op.getOperand(0), Op.getOperand(1), Even, Odd);
4477 return Even;
4478}
4479
4480SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4481 SelectionDAG &DAG) const {
4482 EVT VT = Op.getValueType();
4483 SDLoc DL(Op);
4484 SDValue Ops[2];
4485 if (is32Bit(VT))
4486 // Just do a normal 64-bit multiplication and extract the results.
4487 // We define this so that it can be used for constant division.
4488 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4489 Op.getOperand(1), Ops[1], Ops[0]);
4490 else if (Subtarget.hasMiscellaneousExtensions2())
4491 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4492 // the high result in the even register. ISD::SMUL_LOHI is defined to
4493 // return the low half first, so the results are in reverse order.
4495 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4496 else {
4497 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4498 //
4499 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4500 //
4501 // but using the fact that the upper halves are either all zeros
4502 // or all ones:
4503 //
4504 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4505 //
4506 // and grouping the right terms together since they are quicker than the
4507 // multiplication:
4508 //
4509 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4510 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4511 SDValue LL = Op.getOperand(0);
4512 SDValue RL = Op.getOperand(1);
4513 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4514 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4515 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4516 // the high result in the even register. ISD::SMUL_LOHI is defined to
4517 // return the low half first, so the results are in reverse order.
4519 LL, RL, Ops[1], Ops[0]);
4520 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4521 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4522 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4523 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4524 }
4525 return DAG.getMergeValues(Ops, DL);
4526}
4527
4528SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4529 SelectionDAG &DAG) const {
4530 EVT VT = Op.getValueType();
4531 SDLoc DL(Op);
4532 SDValue Ops[2];
4533 if (is32Bit(VT))
4534 // Just do a normal 64-bit multiplication and extract the results.
4535 // We define this so that it can be used for constant division.
4536 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4537 Op.getOperand(1), Ops[1], Ops[0]);
4538 else
4539 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4540 // the high result in the even register. ISD::UMUL_LOHI is defined to
4541 // return the low half first, so the results are in reverse order.
4543 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4544 return DAG.getMergeValues(Ops, DL);
4545}
4546
4547SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4548 SelectionDAG &DAG) const {
4549 SDValue Op0 = Op.getOperand(0);
4550 SDValue Op1 = Op.getOperand(1);
4551 EVT VT = Op.getValueType();
4552 SDLoc DL(Op);
4553
4554 // We use DSGF for 32-bit division. This means the first operand must
4555 // always be 64-bit, and the second operand should be 32-bit whenever
4556 // that is possible, to improve performance.
4557 if (is32Bit(VT))
4558 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4559 else if (DAG.ComputeNumSignBits(Op1) > 32)
4560 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4561
4562 // DSG(F) returns the remainder in the even register and the
4563 // quotient in the odd register.
4564 SDValue Ops[2];
4565 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4566 return DAG.getMergeValues(Ops, DL);
4567}
4568
4569SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4570 SelectionDAG &DAG) const {
4571 EVT VT = Op.getValueType();
4572 SDLoc DL(Op);
4573
4574 // DL(G) returns the remainder in the even register and the
4575 // quotient in the odd register.
4576 SDValue Ops[2];
4578 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4579 return DAG.getMergeValues(Ops, DL);
4580}
4581
4582SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4583 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4584
4585 // Get the known-zero masks for each operand.
4586 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4587 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4588 DAG.computeKnownBits(Ops[1])};
4589
4590 // See if the upper 32 bits of one operand and the lower 32 bits of the
4591 // other are known zero. They are the low and high operands respectively.
4592 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4593 Known[1].Zero.getZExtValue() };
4594 unsigned High, Low;
4595 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4596 High = 1, Low = 0;
4597 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4598 High = 0, Low = 1;
4599 else
4600 return Op;
4601
4602 SDValue LowOp = Ops[Low];
4603 SDValue HighOp = Ops[High];
4604
4605 // If the high part is a constant, we're better off using IILH.
4606 if (HighOp.getOpcode() == ISD::Constant)
4607 return Op;
4608
4609 // If the low part is a constant that is outside the range of LHI,
4610 // then we're better off using IILF.
4611 if (LowOp.getOpcode() == ISD::Constant) {
4612 int64_t Value = int32_t(LowOp->getAsZExtVal());
4613 if (!isInt<16>(Value))
4614 return Op;
4615 }
4616
4617 // Check whether the high part is an AND that doesn't change the
4618 // high 32 bits and just masks out low bits. We can skip it if so.
4619 if (HighOp.getOpcode() == ISD::AND &&
4620 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4621 SDValue HighOp0 = HighOp.getOperand(0);
4623 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4624 HighOp = HighOp0;
4625 }
4626
4627 // Take advantage of the fact that all GR32 operations only change the
4628 // low 32 bits by truncating Low to an i32 and inserting it directly
4629 // using a subreg. The interesting cases are those where the truncation
4630 // can be folded.
4631 SDLoc DL(Op);
4632 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4633 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4634 MVT::i64, HighOp, Low32);
4635}
4636
4637// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4638SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4639 SelectionDAG &DAG) const {
4640 SDNode *N = Op.getNode();
4641 SDValue LHS = N->getOperand(0);
4642 SDValue RHS = N->getOperand(1);
4643 SDLoc DL(N);
4644
4645 if (N->getValueType(0) == MVT::i128) {
4646 unsigned BaseOp = 0;
4647 unsigned FlagOp = 0;
4648 bool IsBorrow = false;
4649 switch (Op.getOpcode()) {
4650 default: llvm_unreachable("Unknown instruction!");
4651 case ISD::UADDO:
4652 BaseOp = ISD::ADD;
4653 FlagOp = SystemZISD::VACC;
4654 break;
4655 case ISD::USUBO:
4656 BaseOp = ISD::SUB;
4657 FlagOp = SystemZISD::VSCBI;
4658 IsBorrow = true;
4659 break;
4660 }
4661 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4662 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4663 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4664 DAG.getValueType(MVT::i1));
4665 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4666 if (IsBorrow)
4667 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4668 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4669 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4670 }
4671
4672 unsigned BaseOp = 0;
4673 unsigned CCValid = 0;
4674 unsigned CCMask = 0;
4675
4676 switch (Op.getOpcode()) {
4677 default: llvm_unreachable("Unknown instruction!");
4678 case ISD::SADDO:
4679 BaseOp = SystemZISD::SADDO;
4680 CCValid = SystemZ::CCMASK_ARITH;
4682 break;
4683 case ISD::SSUBO:
4684 BaseOp = SystemZISD::SSUBO;
4685 CCValid = SystemZ::CCMASK_ARITH;
4687 break;
4688 case ISD::UADDO:
4689 BaseOp = SystemZISD::UADDO;
4690 CCValid = SystemZ::CCMASK_LOGICAL;
4692 break;
4693 case ISD::USUBO:
4694 BaseOp = SystemZISD::USUBO;
4695 CCValid = SystemZ::CCMASK_LOGICAL;
4697 break;
4698 }
4699
4700 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4701 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4702
4703 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4704 if (N->getValueType(1) == MVT::i1)
4705 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4706
4707 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4708}
4709
4710static bool isAddCarryChain(SDValue Carry) {
4711 while (Carry.getOpcode() == ISD::UADDO_CARRY)
4712 Carry = Carry.getOperand(2);
4713 return Carry.getOpcode() == ISD::UADDO;
4714}
4715
4716static bool isSubBorrowChain(SDValue Carry) {
4717 while (Carry.getOpcode() == ISD::USUBO_CARRY)
4718 Carry = Carry.getOperand(2);
4719 return Carry.getOpcode() == ISD::USUBO;
4720}
4721
4722// Lower UADDO_CARRY/USUBO_CARRY nodes.
4723SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4724 SelectionDAG &DAG) const {
4725
4726 SDNode *N = Op.getNode();
4727 MVT VT = N->getSimpleValueType(0);
4728
4729 // Let legalize expand this if it isn't a legal type yet.
4730 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4731 return SDValue();
4732
4733 SDValue LHS = N->getOperand(0);
4734 SDValue RHS = N->getOperand(1);
4735 SDValue Carry = Op.getOperand(2);
4736 SDLoc DL(N);
4737
4738 if (VT == MVT::i128) {
4739 unsigned BaseOp = 0;
4740 unsigned FlagOp = 0;
4741 bool IsBorrow = false;
4742 switch (Op.getOpcode()) {
4743 default: llvm_unreachable("Unknown instruction!");
4744 case ISD::UADDO_CARRY:
4745 BaseOp = SystemZISD::VAC;
4746 FlagOp = SystemZISD::VACCC;
4747 break;
4748 case ISD::USUBO_CARRY:
4749 BaseOp = SystemZISD::VSBI;
4750 FlagOp = SystemZISD::VSBCBI;
4751 IsBorrow = true;
4752 break;
4753 }
4754 if (IsBorrow)
4755 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4756 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
4757 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
4758 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
4759 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
4760 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4761 DAG.getValueType(MVT::i1));
4762 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4763 if (IsBorrow)
4764 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4765 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4766 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4767 }
4768
4769 unsigned BaseOp = 0;
4770 unsigned CCValid = 0;
4771 unsigned CCMask = 0;
4772
4773 switch (Op.getOpcode()) {
4774 default: llvm_unreachable("Unknown instruction!");
4775 case ISD::UADDO_CARRY:
4776 if (!isAddCarryChain(Carry))
4777 return SDValue();
4778
4779 BaseOp = SystemZISD::ADDCARRY;
4780 CCValid = SystemZ::CCMASK_LOGICAL;
4782 break;
4783 case ISD::USUBO_CARRY:
4784 if (!isSubBorrowChain(Carry))
4785 return SDValue();
4786
4787 BaseOp = SystemZISD::SUBCARRY;
4788 CCValid = SystemZ::CCMASK_LOGICAL;
4790 break;
4791 }
4792
4793 // Set the condition code from the carry flag.
4794 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4795 DAG.getConstant(CCValid, DL, MVT::i32),
4796 DAG.getConstant(CCMask, DL, MVT::i32));
4797
4798 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4799 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4800
4801 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4802 if (N->getValueType(1) == MVT::i1)
4803 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4804
4805 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4806}
4807
4808SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4809 SelectionDAG &DAG) const {
4810 EVT VT = Op.getValueType();
4811 SDLoc DL(Op);
4812 Op = Op.getOperand(0);
4813
4814 if (VT.getScalarSizeInBits() == 128) {
4815 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
4816 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
4817 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
4818 DAG.getConstant(0, DL, MVT::i64));
4819 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4820 return Op;
4821 }
4822
4823 // Handle vector types via VPOPCT.
4824 if (VT.isVector()) {
4825 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4826 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4827 switch (VT.getScalarSizeInBits()) {
4828 case 8:
4829 break;
4830 case 16: {
4831 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4832 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
4833 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
4834 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4835 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
4836 break;
4837 }
4838 case 32: {
4839 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4840 DAG.getConstant(0, DL, MVT::i32));
4841 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4842 break;
4843 }
4844 case 64: {
4845 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4846 DAG.getConstant(0, DL, MVT::i32));
4847 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
4848 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4849 break;
4850 }
4851 default:
4852 llvm_unreachable("Unexpected type");
4853 }
4854 return Op;
4855 }
4856
4857 // Get the known-zero mask for the operand.
4858 KnownBits Known = DAG.computeKnownBits(Op);
4859 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
4860 if (NumSignificantBits == 0)
4861 return DAG.getConstant(0, DL, VT);
4862
4863 // Skip known-zero high parts of the operand.
4864 int64_t OrigBitSize = VT.getSizeInBits();
4865 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
4866 BitSize = std::min(BitSize, OrigBitSize);
4867
4868 // The POPCNT instruction counts the number of bits in each byte.
4869 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
4870 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
4871 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
4872
4873 // Add up per-byte counts in a binary tree. All bits of Op at
4874 // position larger than BitSize remain zero throughout.
4875 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
4876 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
4877 if (BitSize != OrigBitSize)
4878 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
4879 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
4880 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4881 }
4882
4883 // Extract overall result from high byte.
4884 if (BitSize > 8)
4885 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
4886 DAG.getConstant(BitSize - 8, DL, VT));
4887
4888 return Op;
4889}
4890
4891SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
4892 SelectionDAG &DAG) const {
4893 SDLoc DL(Op);
4894 AtomicOrdering FenceOrdering =
4895 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4896 SyncScope::ID FenceSSID =
4897 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4898
4899 // The only fence that needs an instruction is a sequentially-consistent
4900 // cross-thread fence.
4901 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
4902 FenceSSID == SyncScope::System) {
4903 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
4904 Op.getOperand(0)),
4905 0);
4906 }
4907
4908 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4909 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
4910}
4911
4912SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
4913 SelectionDAG &DAG) const {
4914 auto *Node = cast<AtomicSDNode>(Op.getNode());
4915 assert(
4916 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
4917 "Only custom lowering i128 or f128.");
4918 // Use same code to handle both legal and non-legal i128 types.
4921 return DAG.getMergeValues(Results, SDLoc(Op));
4922}
4923
4924// Prepare for a Compare And Swap for a subword operation. This needs to be
4925// done in memory with 4 bytes at natural alignment.
4927 SDValue &AlignedAddr, SDValue &BitShift,
4928 SDValue &NegBitShift) {
4929 EVT PtrVT = Addr.getValueType();
4930 EVT WideVT = MVT::i32;
4931
4932 // Get the address of the containing word.
4933 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4934 DAG.getSignedConstant(-4, DL, PtrVT));
4935
4936 // Get the number of bits that the word must be rotated left in order
4937 // to bring the field to the top bits of a GR32.
4938 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4939 DAG.getConstant(3, DL, PtrVT));
4940 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4941
4942 // Get the complementing shift amount, for rotating a field in the top
4943 // bits back to its proper position.
4944 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4945 DAG.getConstant(0, DL, WideVT), BitShift);
4946
4947}
4948
4949// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
4950// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
4951SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
4952 SelectionDAG &DAG,
4953 unsigned Opcode) const {
4954 auto *Node = cast<AtomicSDNode>(Op.getNode());
4955
4956 // 32-bit operations need no special handling.
4957 EVT NarrowVT = Node->getMemoryVT();
4958 EVT WideVT = MVT::i32;
4959 if (NarrowVT == WideVT)
4960 return Op;
4961
4962 int64_t BitSize = NarrowVT.getSizeInBits();
4963 SDValue ChainIn = Node->getChain();
4964 SDValue Addr = Node->getBasePtr();
4965 SDValue Src2 = Node->getVal();
4966 MachineMemOperand *MMO = Node->getMemOperand();
4967 SDLoc DL(Node);
4968
4969 // Convert atomic subtracts of constants into additions.
4970 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
4971 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
4973 Src2 = DAG.getSignedConstant(-Const->getSExtValue(), DL,
4974 Src2.getValueType());
4975 }
4976
4977 SDValue AlignedAddr, BitShift, NegBitShift;
4978 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4979
4980 // Extend the source operand to 32 bits and prepare it for the inner loop.
4981 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
4982 // operations require the source to be shifted in advance. (This shift
4983 // can be folded if the source is constant.) For AND and NAND, the lower
4984 // bits must be set, while for other opcodes they should be left clear.
4985 if (Opcode != SystemZISD::ATOMIC_SWAPW)
4986 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
4987 DAG.getConstant(32 - BitSize, DL, WideVT));
4988 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
4990 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
4991 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
4992
4993 // Construct the ATOMIC_LOADW_* node.
4994 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
4995 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
4996 DAG.getConstant(BitSize, DL, WideVT) };
4997 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
4998 NarrowVT, MMO);
4999
5000 // Rotate the result of the final CS so that the field is in the lower
5001 // bits of a GR32, then truncate it.
5002 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
5003 DAG.getConstant(BitSize, DL, WideVT));
5004 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
5005
5006 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
5007 return DAG.getMergeValues(RetOps, DL);
5008}
5009
5010// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
5011// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
5012SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
5013 SelectionDAG &DAG) const {
5014 auto *Node = cast<AtomicSDNode>(Op.getNode());
5015 EVT MemVT = Node->getMemoryVT();
5016 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
5017 // A full-width operation: negate and use LAA(G).
5018 assert(Op.getValueType() == MemVT && "Mismatched VTs");
5019 assert(Subtarget.hasInterlockedAccess1() &&
5020 "Should have been expanded by AtomicExpand pass.");
5021 SDValue Src2 = Node->getVal();
5022 SDLoc DL(Src2);
5023 SDValue NegSrc2 =
5024 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
5025 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
5026 Node->getChain(), Node->getBasePtr(), NegSrc2,
5027 Node->getMemOperand());
5028 }
5029
5030 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
5031}
5032
5033// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
5034SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
5035 SelectionDAG &DAG) const {
5036 auto *Node = cast<AtomicSDNode>(Op.getNode());
5037 SDValue ChainIn = Node->getOperand(0);
5038 SDValue Addr = Node->getOperand(1);
5039 SDValue CmpVal = Node->getOperand(2);
5040 SDValue SwapVal = Node->getOperand(3);
5041 MachineMemOperand *MMO = Node->getMemOperand();
5042 SDLoc DL(Node);
5043
5044 if (Node->getMemoryVT() == MVT::i128) {
5045 // Use same code to handle both legal and non-legal i128 types.
5048 return DAG.getMergeValues(Results, DL);
5049 }
5050
5051 // We have native support for 32-bit and 64-bit compare and swap, but we
5052 // still need to expand extracting the "success" result from the CC.
5053 EVT NarrowVT = Node->getMemoryVT();
5054 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
5055 if (NarrowVT == WideVT) {
5056 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5057 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
5059 DL, Tys, Ops, NarrowVT, MMO);
5060 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5062
5063 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
5064 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5065 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5066 return SDValue();
5067 }
5068
5069 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
5070 // via a fullword ATOMIC_CMP_SWAPW operation.
5071 int64_t BitSize = NarrowVT.getSizeInBits();
5072
5073 SDValue AlignedAddr, BitShift, NegBitShift;
5074 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5075
5076 // Construct the ATOMIC_CMP_SWAPW node.
5077 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5078 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
5079 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
5081 VTList, Ops, NarrowVT, MMO);
5082 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5084
5085 // emitAtomicCmpSwapW() will zero extend the result (original value).
5086 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
5087 DAG.getValueType(NarrowVT));
5088 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
5089 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5090 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5091 return SDValue();
5092}
5093
5095SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
5096 // Because of how we convert atomic_load and atomic_store to normal loads and
5097 // stores in the DAG, we need to ensure that the MMOs are marked volatile
5098 // since DAGCombine hasn't been updated to account for atomic, but non
5099 // volatile loads. (See D57601)
5100 if (auto *SI = dyn_cast<StoreInst>(&I))
5101 if (SI->isAtomic())
5103 if (auto *LI = dyn_cast<LoadInst>(&I))
5104 if (LI->isAtomic())
5106 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
5107 if (AI->isAtomic())
5109 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
5110 if (AI->isAtomic())
5113}
5114
5115SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
5116 SelectionDAG &DAG) const {
5118 auto *Regs = Subtarget.getSpecialRegisters();
5120 report_fatal_error("Variable-sized stack allocations are not supported "
5121 "in GHC calling convention");
5122 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
5123 Regs->getStackPointerRegister(), Op.getValueType());
5124}
5125
5126SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
5127 SelectionDAG &DAG) const {
5129 auto *Regs = Subtarget.getSpecialRegisters();
5130 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
5131
5133 report_fatal_error("Variable-sized stack allocations are not supported "
5134 "in GHC calling convention");
5135
5136 SDValue Chain = Op.getOperand(0);
5137 SDValue NewSP = Op.getOperand(1);
5138 SDValue Backchain;
5139 SDLoc DL(Op);
5140
5141 if (StoreBackchain) {
5142 SDValue OldSP = DAG.getCopyFromReg(
5143 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
5144 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
5146 }
5147
5148 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
5149
5150 if (StoreBackchain)
5151 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
5153
5154 return Chain;
5155}
5156
5157SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
5158 SelectionDAG &DAG) const {
5159 bool IsData = Op.getConstantOperandVal(4);
5160 if (!IsData)
5161 // Just preserve the chain.
5162 return Op.getOperand(0);
5163
5164 SDLoc DL(Op);
5165 bool IsWrite = Op.getConstantOperandVal(2);
5166 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
5167 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
5168 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
5169 Op.getOperand(1)};
5171 Node->getVTList(), Ops,
5172 Node->getMemoryVT(), Node->getMemOperand());
5173}
5174
5175// Convert condition code in CCReg to an i32 value.
5177 SDLoc DL(CCReg);
5178 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
5179 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
5180 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
5181}
5182
5183SDValue
5184SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
5185 SelectionDAG &DAG) const {
5186 unsigned Opcode, CCValid;
5187 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
5188 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
5189 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
5190 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
5191 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
5192 return SDValue();
5193 }
5194
5195 return SDValue();
5196}
5197
5198SDValue
5199SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
5200 SelectionDAG &DAG) const {
5201 unsigned Opcode, CCValid;
5202 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
5203 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
5204 if (Op->getNumValues() == 1)
5205 return getCCResult(DAG, SDValue(Node, 0));
5206 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
5207 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
5208 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
5209 }
5210
5211 unsigned Id = Op.getConstantOperandVal(0);
5212 switch (Id) {
5213 case Intrinsic::thread_pointer:
5214 return lowerThreadPointer(SDLoc(Op), DAG);
5215
5216 case Intrinsic::s390_vpdi:
5217 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
5218 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5219
5220 case Intrinsic::s390_vperm:
5221 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
5222 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5223
5224 case Intrinsic::s390_vuphb:
5225 case Intrinsic::s390_vuphh:
5226 case Intrinsic::s390_vuphf:
5227 case Intrinsic::s390_vuphg:
5228 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
5229 Op.getOperand(1));
5230
5231 case Intrinsic::s390_vuplhb:
5232 case Intrinsic::s390_vuplhh:
5233 case Intrinsic::s390_vuplhf:
5234 case Intrinsic::s390_vuplhg:
5235 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
5236 Op.getOperand(1));
5237
5238 case Intrinsic::s390_vuplb:
5239 case Intrinsic::s390_vuplhw:
5240 case Intrinsic::s390_vuplf:
5241 case Intrinsic::s390_vuplg:
5242 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
5243 Op.getOperand(1));
5244
5245 case Intrinsic::s390_vupllb:
5246 case Intrinsic::s390_vupllh:
5247 case Intrinsic::s390_vupllf:
5248 case Intrinsic::s390_vupllg:
5249 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
5250 Op.getOperand(1));
5251
5252 case Intrinsic::s390_vsumb:
5253 case Intrinsic::s390_vsumh:
5254 case Intrinsic::s390_vsumgh:
5255 case Intrinsic::s390_vsumgf:
5256 case Intrinsic::s390_vsumqf:
5257 case Intrinsic::s390_vsumqg:
5258 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
5259 Op.getOperand(1), Op.getOperand(2));
5260
5261 case Intrinsic::s390_vaq:
5262 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5263 Op.getOperand(1), Op.getOperand(2));
5264 case Intrinsic::s390_vaccb:
5265 case Intrinsic::s390_vacch:
5266 case Intrinsic::s390_vaccf:
5267 case Intrinsic::s390_vaccg:
5268 case Intrinsic::s390_vaccq:
5269 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
5270 Op.getOperand(1), Op.getOperand(2));
5271 case Intrinsic::s390_vacq:
5272 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
5273 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5274 case Intrinsic::s390_vacccq:
5275 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
5276 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5277
5278 case Intrinsic::s390_vsq:
5279 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
5280 Op.getOperand(1), Op.getOperand(2));
5281 case Intrinsic::s390_vscbib:
5282 case Intrinsic::s390_vscbih:
5283 case Intrinsic::s390_vscbif:
5284 case Intrinsic::s390_vscbig:
5285 case Intrinsic::s390_vscbiq:
5286 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
5287 Op.getOperand(1), Op.getOperand(2));
5288 case Intrinsic::s390_vsbiq:
5289 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
5290 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5291 case Intrinsic::s390_vsbcbiq:
5292 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
5293 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5294 }
5295
5296 return SDValue();
5297}
5298
5299namespace {
5300// Says that SystemZISD operation Opcode can be used to perform the equivalent
5301// of a VPERM with permute vector Bytes. If Opcode takes three operands,
5302// Operand is the constant third operand, otherwise it is the number of
5303// bytes in each element of the result.
5304struct Permute {
5305 unsigned Opcode;
5306 unsigned Operand;
5307 unsigned char Bytes[SystemZ::VectorBytes];
5308};
5309}
5310
5311static const Permute PermuteForms[] = {
5312 // VMRHG
5314 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
5315 // VMRHF
5317 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
5318 // VMRHH
5320 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
5321 // VMRHB
5323 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
5324 // VMRLG
5326 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
5327 // VMRLF
5329 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
5330 // VMRLH
5332 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
5333 // VMRLB
5335 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
5336 // VPKG
5337 { SystemZISD::PACK, 4,
5338 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
5339 // VPKF
5340 { SystemZISD::PACK, 2,
5341 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
5342 // VPKH
5343 { SystemZISD::PACK, 1,
5344 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
5345 // VPDI V1, V2, 4 (low half of V1, high half of V2)
5347 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
5348 // VPDI V1, V2, 1 (high half of V1, low half of V2)
5350 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
5351};
5352
5353// Called after matching a vector shuffle against a particular pattern.
5354// Both the original shuffle and the pattern have two vector operands.
5355// OpNos[0] is the operand of the original shuffle that should be used for
5356// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
5357// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
5358// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
5359// for operands 0 and 1 of the pattern.
5360static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
5361 if (OpNos[0] < 0) {
5362 if (OpNos[1] < 0)
5363 return false;
5364 OpNo0 = OpNo1 = OpNos[1];
5365 } else if (OpNos[1] < 0) {
5366 OpNo0 = OpNo1 = OpNos[0];
5367 } else {
5368 OpNo0 = OpNos[0];
5369 OpNo1 = OpNos[1];
5370 }
5371 return true;
5372}
5373
5374// Bytes is a VPERM-like permute vector, except that -1 is used for
5375// undefined bytes. Return true if the VPERM can be implemented using P.
5376// When returning true set OpNo0 to the VPERM operand that should be
5377// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5378//
5379// For example, if swapping the VPERM operands allows P to match, OpNo0
5380// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5381// operand, but rewriting it to use two duplicated operands allows it to
5382// match P, then OpNo0 and OpNo1 will be the same.
5383static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5384 unsigned &OpNo0, unsigned &OpNo1) {
5385 int OpNos[] = { -1, -1 };
5386 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5387 int Elt = Bytes[I];
5388 if (Elt >= 0) {
5389 // Make sure that the two permute vectors use the same suboperand
5390 // byte number. Only the operand numbers (the high bits) are
5391 // allowed to differ.
5392 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5393 return false;
5394 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5395 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5396 // Make sure that the operand mappings are consistent with previous
5397 // elements.
5398 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5399 return false;
5400 OpNos[ModelOpNo] = RealOpNo;
5401 }
5402 }
5403 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5404}
5405
5406// As above, but search for a matching permute.
5407static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5408 unsigned &OpNo0, unsigned &OpNo1) {
5409 for (auto &P : PermuteForms)
5410 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5411 return &P;
5412 return nullptr;
5413}
5414
5415// Bytes is a VPERM-like permute vector, except that -1 is used for
5416// undefined bytes. This permute is an operand of an outer permute.
5417// See whether redistributing the -1 bytes gives a shuffle that can be
5418// implemented using P. If so, set Transform to a VPERM-like permute vector
5419// that, when applied to the result of P, gives the original permute in Bytes.
5421 const Permute &P,
5422 SmallVectorImpl<int> &Transform) {
5423 unsigned To = 0;
5424 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5425 int Elt = Bytes[From];
5426 if (Elt < 0)
5427 // Byte number From of the result is undefined.
5428 Transform[From] = -1;
5429 else {
5430 while (P.Bytes[To] != Elt) {
5431 To += 1;
5432 if (To == SystemZ::VectorBytes)
5433 return false;
5434 }
5435 Transform[From] = To;
5436 }
5437 }
5438 return true;
5439}
5440
5441// As above, but search for a matching permute.
5442static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5443 SmallVectorImpl<int> &Transform) {
5444 for (auto &P : PermuteForms)
5445 if (matchDoublePermute(Bytes, P, Transform))
5446 return &P;
5447 return nullptr;
5448}
5449
5450// Convert the mask of the given shuffle op into a byte-level mask,
5451// as if it had type vNi8.
5452static bool getVPermMask(SDValue ShuffleOp,
5453 SmallVectorImpl<int> &Bytes) {
5454 EVT VT = ShuffleOp.getValueType();
5455 unsigned NumElements = VT.getVectorNumElements();
5456 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5457
5458 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5459 Bytes.resize(NumElements * BytesPerElement, -1);
5460 for (unsigned I = 0; I < NumElements; ++I) {
5461 int Index = VSN->getMaskElt(I);
5462 if (Index >= 0)
5463 for (unsigned J = 0; J < BytesPerElement; ++J)
5464 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5465 }
5466 return true;
5467 }
5468 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5469 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5470 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5471 Bytes.resize(NumElements * BytesPerElement, -1);
5472 for (unsigned I = 0; I < NumElements; ++I)
5473 for (unsigned J = 0; J < BytesPerElement; ++J)
5474 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5475 return true;
5476 }
5477 return false;
5478}
5479
5480// Bytes is a VPERM-like permute vector, except that -1 is used for
5481// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5482// the result come from a contiguous sequence of bytes from one input.
5483// Set Base to the selector for the first byte if so.
5484static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5485 unsigned BytesPerElement, int &Base) {
5486 Base = -1;
5487 for (unsigned I = 0; I < BytesPerElement; ++I) {
5488 if (Bytes[Start + I] >= 0) {
5489 unsigned Elem = Bytes[Start + I];
5490 if (Base < 0) {
5491 Base = Elem - I;
5492 // Make sure the bytes would come from one input operand.
5493 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5494 return false;
5495 } else if (unsigned(Base) != Elem - I)
5496 return false;
5497 }
5498 }
5499 return true;
5500}
5501
5502// Bytes is a VPERM-like permute vector, except that -1 is used for
5503// undefined bytes. Return true if it can be performed using VSLDB.
5504// When returning true, set StartIndex to the shift amount and OpNo0
5505// and OpNo1 to the VPERM operands that should be used as the first
5506// and second shift operand respectively.
5508 unsigned &StartIndex, unsigned &OpNo0,
5509 unsigned &OpNo1) {
5510 int OpNos[] = { -1, -1 };
5511 int Shift = -1;
5512 for (unsigned I = 0; I < 16; ++I) {
5513 int Index = Bytes[I];
5514 if (Index >= 0) {
5515 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5516 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5517 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5518 if (Shift < 0)
5519 Shift = ExpectedShift;
5520 else if (Shift != ExpectedShift)
5521 return false;
5522 // Make sure that the operand mappings are consistent with previous
5523 // elements.
5524 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5525 return false;
5526 OpNos[ModelOpNo] = RealOpNo;
5527 }
5528 }
5529 StartIndex = Shift;
5530 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5531}
5532
5533// Create a node that performs P on operands Op0 and Op1, casting the
5534// operands to the appropriate type. The type of the result is determined by P.
5536 const Permute &P, SDValue Op0, SDValue Op1) {
5537 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5538 // elements of a PACK are twice as wide as the outputs.
5539 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5540 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5541 P.Operand);
5542 // Cast both operands to the appropriate type.
5543 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5544 SystemZ::VectorBytes / InBytes);
5545 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5546 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5547 SDValue Op;
5548 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5549 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5550 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5551 } else if (P.Opcode == SystemZISD::PACK) {
5552 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5553 SystemZ::VectorBytes / P.Operand);
5554 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5555 } else {
5556 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5557 }
5558 return Op;
5559}
5560
5561static bool isZeroVector(SDValue N) {
5562 if (N->getOpcode() == ISD::BITCAST)
5563 N = N->getOperand(0);
5564 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5565 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5566 return Op->getZExtValue() == 0;
5567 return ISD::isBuildVectorAllZeros(N.getNode());
5568}
5569
5570// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5571static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5572 for (unsigned I = 0; I < Num ; I++)
5573 if (isZeroVector(Ops[I]))
5574 return I;
5575 return UINT32_MAX;
5576}
5577
5578// Bytes is a VPERM-like permute vector, except that -1 is used for
5579// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5580// VSLDB or VPERM.
5582 SDValue *Ops,
5583 const SmallVectorImpl<int> &Bytes) {
5584 for (unsigned I = 0; I < 2; ++I)
5585 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5586
5587 // First see whether VSLDB can be used.
5588 unsigned StartIndex, OpNo0, OpNo1;
5589 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5590 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5591 Ops[OpNo1],
5592 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5593
5594 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5595 // eliminate a zero vector by reusing any zero index in the permute vector.
5596 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5597 if (ZeroVecIdx != UINT32_MAX) {
5598 bool MaskFirst = true;
5599 int ZeroIdx = -1;
5600 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5601 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5602 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5603 if (OpNo == ZeroVecIdx && I == 0) {
5604 // If the first byte is zero, use mask as first operand.
5605 ZeroIdx = 0;
5606 break;
5607 }
5608 if (OpNo != ZeroVecIdx && Byte == 0) {
5609 // If mask contains a zero, use it by placing that vector first.
5610 ZeroIdx = I + SystemZ::VectorBytes;
5611 MaskFirst = false;
5612 break;
5613 }
5614 }
5615 if (ZeroIdx != -1) {
5616 SDValue IndexNodes[SystemZ::VectorBytes];
5617 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5618 if (Bytes[I] >= 0) {
5619 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5620 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5621 if (OpNo == ZeroVecIdx)
5622 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5623 else {
5624 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5625 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5626 }
5627 } else
5628 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5629 }
5630 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5631 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5632 if (MaskFirst)
5633 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5634 Mask);
5635 else
5636 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5637 Mask);
5638 }
5639 }
5640
5641 SDValue IndexNodes[SystemZ::VectorBytes];
5642 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5643 if (Bytes[I] >= 0)
5644 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5645 else
5646 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5647 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5648 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5649 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5650}
5651
5652namespace {
5653// Describes a general N-operand vector shuffle.
5654struct GeneralShuffle {
5655 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}
5656 void addUndef();
5657 bool add(SDValue, unsigned);
5658 SDValue getNode(SelectionDAG &, const SDLoc &);
5659 void tryPrepareForUnpack();
5660 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5661 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5662
5663 // The operands of the shuffle.
5665
5666 // Index I is -1 if byte I of the result is undefined. Otherwise the
5667 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5668 // Bytes[I] / SystemZ::VectorBytes.
5670
5671 // The type of the shuffle result.
5672 EVT VT;
5673
5674 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5675 unsigned UnpackFromEltSize;
5676};
5677}
5678
5679// Add an extra undefined element to the shuffle.
5680void GeneralShuffle::addUndef() {
5681 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5682 for (unsigned I = 0; I < BytesPerElement; ++I)
5683 Bytes.push_back(-1);
5684}
5685
5686// Add an extra element to the shuffle, taking it from element Elem of Op.
5687// A null Op indicates a vector input whose value will be calculated later;
5688// there is at most one such input per shuffle and it always has the same
5689// type as the result. Aborts and returns false if the source vector elements
5690// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
5691// LLVM they become implicitly extended, but this is rare and not optimized.
5692bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
5693 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5694
5695 // The source vector can have wider elements than the result,
5696 // either through an explicit TRUNCATE or because of type legalization.
5697 // We want the least significant part.
5698 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
5699 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
5700
5701 // Return false if the source elements are smaller than their destination
5702 // elements.
5703 if (FromBytesPerElement < BytesPerElement)
5704 return false;
5705
5706 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
5707 (FromBytesPerElement - BytesPerElement));
5708
5709 // Look through things like shuffles and bitcasts.
5710 while (Op.getNode()) {
5711 if (Op.getOpcode() == ISD::BITCAST)
5712 Op = Op.getOperand(0);
5713 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
5714 // See whether the bytes we need come from a contiguous part of one
5715 // operand.
5717 if (!getVPermMask(Op, OpBytes))
5718 break;
5719 int NewByte;
5720 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
5721 break;
5722 if (NewByte < 0) {
5723 addUndef();
5724 return true;
5725 }
5726 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
5727 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
5728 } else if (Op.isUndef()) {
5729 addUndef();
5730 return true;
5731 } else
5732 break;
5733 }
5734
5735 // Make sure that the source of the extraction is in Ops.
5736 unsigned OpNo = 0;
5737 for (; OpNo < Ops.size(); ++OpNo)
5738 if (Ops[OpNo] == Op)
5739 break;
5740 if (OpNo == Ops.size())
5741 Ops.push_back(Op);
5742
5743 // Add the element to Bytes.
5744 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
5745 for (unsigned I = 0; I < BytesPerElement; ++I)
5746 Bytes.push_back(Base + I);
5747
5748 return true;
5749}
5750
5751// Return SDNodes for the completed shuffle.
5752SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
5753 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
5754
5755 if (Ops.size() == 0)
5756 return DAG.getUNDEF(VT);
5757
5758 // Use a single unpack if possible as the last operation.
5759 tryPrepareForUnpack();
5760
5761 // Make sure that there are at least two shuffle operands.
5762 if (Ops.size() == 1)
5763 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
5764
5765 // Create a tree of shuffles, deferring root node until after the loop.
5766 // Try to redistribute the undefined elements of non-root nodes so that
5767 // the non-root shuffles match something like a pack or merge, then adjust
5768 // the parent node's permute vector to compensate for the new order.
5769 // Among other things, this copes with vectors like <2 x i16> that were
5770 // padded with undefined elements during type legalization.
5771 //
5772 // In the best case this redistribution will lead to the whole tree
5773 // using packs and merges. It should rarely be a loss in other cases.
5774 unsigned Stride = 1;
5775 for (; Stride * 2 < Ops.size(); Stride *= 2) {
5776 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
5777 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
5778
5779 // Create a mask for just these two operands.
5781 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5782 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
5783 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
5784 if (OpNo == I)
5785 NewBytes[J] = Byte;
5786 else if (OpNo == I + Stride)
5787 NewBytes[J] = SystemZ::VectorBytes + Byte;
5788 else
5789 NewBytes[J] = -1;
5790 }
5791 // See if it would be better to reorganize NewMask to avoid using VPERM.
5793 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
5794 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
5795 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
5796 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5797 if (NewBytes[J] >= 0) {
5798 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
5799 "Invalid double permute");
5800 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
5801 } else
5802 assert(NewBytesMap[J] < 0 && "Invalid double permute");
5803 }
5804 } else {
5805 // Just use NewBytes on the operands.
5806 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
5807 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
5808 if (NewBytes[J] >= 0)
5809 Bytes[J] = I * SystemZ::VectorBytes + J;
5810 }
5811 }
5812 }
5813
5814 // Now we just have 2 inputs. Put the second operand in Ops[1].
5815 if (Stride > 1) {
5816 Ops[1] = Ops[Stride];
5817 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5818 if (Bytes[I] >= int(SystemZ::VectorBytes))
5819 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
5820 }
5821
5822 // Look for an instruction that can do the permute without resorting
5823 // to VPERM.
5824 unsigned OpNo0, OpNo1;
5825 SDValue Op;
5826 if (unpackWasPrepared() && Ops[1].isUndef())
5827 Op = Ops[0];
5828 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
5829 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
5830 else
5831 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
5832
5833 Op = insertUnpackIfPrepared(DAG, DL, Op);
5834
5835 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5836}
5837
5838#ifndef NDEBUG
5839static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
5840 dbgs() << Msg.c_str() << " { ";
5841 for (unsigned i = 0; i < Bytes.size(); i++)
5842 dbgs() << Bytes[i] << " ";
5843 dbgs() << "}\n";
5844}
5845#endif
5846
5847// If the Bytes vector matches an unpack operation, prepare to do the unpack
5848// after all else by removing the zero vector and the effect of the unpack on
5849// Bytes.
5850void GeneralShuffle::tryPrepareForUnpack() {
5851 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
5852 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
5853 return;
5854
5855 // Only do this if removing the zero vector reduces the depth, otherwise
5856 // the critical path will increase with the final unpack.
5857 if (Ops.size() > 2 &&
5858 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
5859 return;
5860
5861 // Find an unpack that would allow removing the zero vector from Ops.
5862 UnpackFromEltSize = 1;
5863 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
5864 bool MatchUnpack = true;
5866 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
5867 unsigned ToEltSize = UnpackFromEltSize * 2;
5868 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
5869 if (!IsZextByte)
5870 SrcBytes.push_back(Bytes[Elt]);
5871 if (Bytes[Elt] != -1) {
5872 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
5873 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
5874 MatchUnpack = false;
5875 break;
5876 }
5877 }
5878 }
5879 if (MatchUnpack) {
5880 if (Ops.size() == 2) {
5881 // Don't use unpack if a single source operand needs rearrangement.
5882 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
5883 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
5884 UnpackFromEltSize = UINT_MAX;
5885 return;
5886 }
5887 }
5888 break;
5889 }
5890 }
5891 if (UnpackFromEltSize > 4)
5892 return;
5893
5894 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
5895 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
5896 << ".\n";
5897 dumpBytes(Bytes, "Original Bytes vector:"););
5898
5899 // Apply the unpack in reverse to the Bytes array.
5900 unsigned B = 0;
5901 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
5902 Elt += UnpackFromEltSize;
5903 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
5904 Bytes[B] = Bytes[Elt];
5905 }
5906 while (B < SystemZ::VectorBytes)
5907 Bytes[B++] = -1;
5908
5909 // Remove the zero vector from Ops
5910 Ops.erase(&Ops[ZeroVecOpNo]);
5911 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5912 if (Bytes[I] >= 0) {
5913 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5914 if (OpNo > ZeroVecOpNo)
5915 Bytes[I] -= SystemZ::VectorBytes;
5916 }
5917
5918 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
5919 dbgs() << "\n";);
5920}
5921
5922SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
5923 const SDLoc &DL,
5924 SDValue Op) {
5925 if (!unpackWasPrepared())
5926 return Op;
5927 unsigned InBits = UnpackFromEltSize * 8;
5928 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
5929 SystemZ::VectorBits / InBits);
5930 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
5931 unsigned OutBits = InBits * 2;
5932 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
5933 SystemZ::VectorBits / OutBits);
5934 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
5935}
5936
5937// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
5939 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
5940 if (!Op.getOperand(I).isUndef())
5941 return false;
5942 return true;
5943}
5944
5945// Return a vector of type VT that contains Value in the first element.
5946// The other elements don't matter.
5948 SDValue Value) {
5949 // If we have a constant, replicate it to all elements and let the
5950 // BUILD_VECTOR lowering take care of it.
5951 if (Value.getOpcode() == ISD::Constant ||
5952 Value.getOpcode() == ISD::ConstantFP) {
5954 return DAG.getBuildVector(VT, DL, Ops);
5955 }
5956 if (Value.isUndef())
5957 return DAG.getUNDEF(VT);
5958 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
5959}
5960
5961// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
5962// element 1. Used for cases in which replication is cheap.
5964 SDValue Op0, SDValue Op1) {
5965 if (Op0.isUndef()) {
5966 if (Op1.isUndef())
5967 return DAG.getUNDEF(VT);
5968 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
5969 }
5970 if (Op1.isUndef())
5971 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
5972 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
5973 buildScalarToVector(DAG, DL, VT, Op0),
5974 buildScalarToVector(DAG, DL, VT, Op1));
5975}
5976
5977// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
5978// vector for them.
5980 SDValue Op1) {
5981 if (Op0.isUndef() && Op1.isUndef())
5982 return DAG.getUNDEF(MVT::v2i64);
5983 // If one of the two inputs is undefined then replicate the other one,
5984 // in order to avoid using another register unnecessarily.
5985 if (Op0.isUndef())
5986 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5987 else if (Op1.isUndef())
5988 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5989 else {
5990 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5991 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5992 }
5993 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
5994}
5995
5996// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
5997// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
5998// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
5999// would benefit from this representation and return it if so.
6001 BuildVectorSDNode *BVN) {
6002 EVT VT = BVN->getValueType(0);
6003 unsigned NumElements = VT.getVectorNumElements();
6004
6005 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
6006 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
6007 // need a BUILD_VECTOR, add an additional placeholder operand for that
6008 // BUILD_VECTOR and store its operands in ResidueOps.
6009 GeneralShuffle GS(VT);
6011 bool FoundOne = false;
6012 for (unsigned I = 0; I < NumElements; ++I) {
6013 SDValue Op = BVN->getOperand(I);
6014 if (Op.getOpcode() == ISD::TRUNCATE)
6015 Op = Op.getOperand(0);
6016 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6017 Op.getOperand(1).getOpcode() == ISD::Constant) {
6018 unsigned Elem = Op.getConstantOperandVal(1);
6019 if (!GS.add(Op.getOperand(0), Elem))
6020 return SDValue();
6021 FoundOne = true;
6022 } else if (Op.isUndef()) {
6023 GS.addUndef();
6024 } else {
6025 if (!GS.add(SDValue(), ResidueOps.size()))
6026 return SDValue();
6027 ResidueOps.push_back(BVN->getOperand(I));
6028 }
6029 }
6030
6031 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
6032 if (!FoundOne)
6033 return SDValue();
6034
6035 // Create the BUILD_VECTOR for the remaining elements, if any.
6036 if (!ResidueOps.empty()) {
6037 while (ResidueOps.size() < NumElements)
6038 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
6039 for (auto &Op : GS.Ops) {
6040 if (!Op.getNode()) {
6041 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
6042 break;
6043 }
6044 }
6045 }
6046 return GS.getNode(DAG, SDLoc(BVN));
6047}
6048
6049bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
6050 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
6051 return true;
6052 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
6053 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
6054 return true;
6055 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
6056 return true;
6057 return false;
6058}
6059
6060// Combine GPR scalar values Elems into a vector of type VT.
6061SDValue
6062SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
6063 SmallVectorImpl<SDValue> &Elems) const {
6064 // See whether there is a single replicated value.
6066 unsigned int NumElements = Elems.size();
6067 unsigned int Count = 0;
6068 for (auto Elem : Elems) {
6069 if (!Elem.isUndef()) {
6070 if (!Single.getNode())
6071 Single = Elem;
6072 else if (Elem != Single) {
6073 Single = SDValue();
6074 break;
6075 }
6076 Count += 1;
6077 }
6078 }
6079 // There are three cases here:
6080 //
6081 // - if the only defined element is a loaded one, the best sequence
6082 // is a replicating load.
6083 //
6084 // - otherwise, if the only defined element is an i64 value, we will
6085 // end up with the same VLVGP sequence regardless of whether we short-cut
6086 // for replication or fall through to the later code.
6087 //
6088 // - otherwise, if the only defined element is an i32 or smaller value,
6089 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
6090 // This is only a win if the single defined element is used more than once.
6091 // In other cases we're better off using a single VLVGx.
6092 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
6093 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
6094
6095 // If all elements are loads, use VLREP/VLEs (below).
6096 bool AllLoads = true;
6097 for (auto Elem : Elems)
6098 if (!isVectorElementLoad(Elem)) {
6099 AllLoads = false;
6100 break;
6101 }
6102
6103 // The best way of building a v2i64 from two i64s is to use VLVGP.
6104 if (VT == MVT::v2i64 && !AllLoads)
6105 return joinDwords(DAG, DL, Elems[0], Elems[1]);
6106
6107 // Use a 64-bit merge high to combine two doubles.
6108 if (VT == MVT::v2f64 && !AllLoads)
6109 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6110
6111 // Build v4f32 values directly from the FPRs:
6112 //
6113 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
6114 // V V VMRHF
6115 // <ABxx> <CDxx>
6116 // V VMRHG
6117 // <ABCD>
6118 if (VT == MVT::v4f32 && !AllLoads) {
6119 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6120 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
6121 // Avoid unnecessary undefs by reusing the other operand.
6122 if (Op01.isUndef())
6123 Op01 = Op23;
6124 else if (Op23.isUndef())
6125 Op23 = Op01;
6126 // Merging identical replications is a no-op.
6127 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
6128 return Op01;
6129 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
6130 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
6132 DL, MVT::v2i64, Op01, Op23);
6133 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6134 }
6135
6136 // Collect the constant terms.
6139
6140 unsigned NumConstants = 0;
6141 for (unsigned I = 0; I < NumElements; ++I) {
6142 SDValue Elem = Elems[I];
6143 if (Elem.getOpcode() == ISD::Constant ||
6144 Elem.getOpcode() == ISD::ConstantFP) {
6145 NumConstants += 1;
6146 Constants[I] = Elem;
6147 Done[I] = true;
6148 }
6149 }
6150 // If there was at least one constant, fill in the other elements of
6151 // Constants with undefs to get a full vector constant and use that
6152 // as the starting point.
6154 SDValue ReplicatedVal;
6155 if (NumConstants > 0) {
6156 for (unsigned I = 0; I < NumElements; ++I)
6157 if (!Constants[I].getNode())
6158 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
6159 Result = DAG.getBuildVector(VT, DL, Constants);
6160 } else {
6161 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
6162 // avoid a false dependency on any previous contents of the vector
6163 // register.
6164
6165 // Use a VLREP if at least one element is a load. Make sure to replicate
6166 // the load with the most elements having its value.
6167 std::map<const SDNode*, unsigned> UseCounts;
6168 SDNode *LoadMaxUses = nullptr;
6169 for (unsigned I = 0; I < NumElements; ++I)
6170 if (isVectorElementLoad(Elems[I])) {
6171 SDNode *Ld = Elems[I].getNode();
6172 UseCounts[Ld]++;
6173 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
6174 LoadMaxUses = Ld;
6175 }
6176 if (LoadMaxUses != nullptr) {
6177 ReplicatedVal = SDValue(LoadMaxUses, 0);
6178 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
6179 } else {
6180 // Try to use VLVGP.
6181 unsigned I1 = NumElements / 2 - 1;
6182 unsigned I2 = NumElements - 1;
6183 bool Def1 = !Elems[I1].isUndef();
6184 bool Def2 = !Elems[I2].isUndef();
6185 if (Def1 || Def2) {
6186 SDValue Elem1 = Elems[Def1 ? I1 : I2];
6187 SDValue Elem2 = Elems[Def2 ? I2 : I1];
6188 Result = DAG.getNode(ISD::BITCAST, DL, VT,
6189 joinDwords(DAG, DL, Elem1, Elem2));
6190 Done[I1] = true;
6191 Done[I2] = true;
6192 } else
6193 Result = DAG.getUNDEF(VT);
6194 }
6195 }
6196
6197 // Use VLVGx to insert the other elements.
6198 for (unsigned I = 0; I < NumElements; ++I)
6199 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
6200 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
6201 DAG.getConstant(I, DL, MVT::i32));
6202 return Result;
6203}
6204
6205SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
6206 SelectionDAG &DAG) const {
6207 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
6208 SDLoc DL(Op);
6209 EVT VT = Op.getValueType();
6210
6211 if (BVN->isConstant()) {
6212 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
6213 return Op;
6214
6215 // Fall back to loading it from memory.
6216 return SDValue();
6217 }
6218
6219 // See if we should use shuffles to construct the vector from other vectors.
6220 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
6221 return Res;
6222
6223 // Detect SCALAR_TO_VECTOR conversions.
6225 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
6226
6227 // Otherwise use buildVector to build the vector up from GPRs.
6228 unsigned NumElements = Op.getNumOperands();
6230 for (unsigned I = 0; I < NumElements; ++I)
6231 Ops[I] = Op.getOperand(I);
6232 return buildVector(DAG, DL, VT, Ops);
6233}
6234
6235SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
6236 SelectionDAG &DAG) const {
6237 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
6238 SDLoc DL(Op);
6239 EVT VT = Op.getValueType();
6240 unsigned NumElements = VT.getVectorNumElements();
6241
6242 if (VSN->isSplat()) {
6243 SDValue Op0 = Op.getOperand(0);
6244 unsigned Index = VSN->getSplatIndex();
6245 assert(Index < VT.getVectorNumElements() &&
6246 "Splat index should be defined and in first operand");
6247 // See whether the value we're splatting is directly available as a scalar.
6248 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6250 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
6251 // Otherwise keep it as a vector-to-vector operation.
6252 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
6253 DAG.getTargetConstant(Index, DL, MVT::i32));
6254 }
6255
6256 GeneralShuffle GS(VT);
6257 for (unsigned I = 0; I < NumElements; ++I) {
6258 int Elt = VSN->getMaskElt(I);
6259 if (Elt < 0)
6260 GS.addUndef();
6261 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
6262 unsigned(Elt) % NumElements))
6263 return SDValue();
6264 }
6265 return GS.getNode(DAG, SDLoc(VSN));
6266}
6267
6268SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
6269 SelectionDAG &DAG) const {
6270 SDLoc DL(Op);
6271 // Just insert the scalar into element 0 of an undefined vector.
6272 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
6273 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
6274 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
6275}
6276
6277SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
6278 SelectionDAG &DAG) const {
6279 // Handle insertions of floating-point values.
6280 SDLoc DL(Op);
6281 SDValue Op0 = Op.getOperand(0);
6282 SDValue Op1 = Op.getOperand(1);
6283 SDValue Op2 = Op.getOperand(2);
6284 EVT VT = Op.getValueType();
6285
6286 // Insertions into constant indices of a v2f64 can be done using VPDI.
6287 // However, if the inserted value is a bitcast or a constant then it's
6288 // better to use GPRs, as below.
6289 if (VT == MVT::v2f64 &&
6290 Op1.getOpcode() != ISD::BITCAST &&
6291 Op1.getOpcode() != ISD::ConstantFP &&
6292 Op2.getOpcode() == ISD::Constant) {
6293 uint64_t Index = Op2->getAsZExtVal();
6294 unsigned Mask = VT.getVectorNumElements() - 1;
6295 if (Index <= Mask)
6296 return Op;
6297 }
6298
6299 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
6301 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
6302 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
6303 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
6304 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
6305 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6306}
6307
6308SDValue
6309SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
6310 SelectionDAG &DAG) const {
6311 // Handle extractions of floating-point values.
6312 SDLoc DL(Op);
6313 SDValue Op0 = Op.getOperand(0);
6314 SDValue Op1 = Op.getOperand(1);
6315 EVT VT = Op.getValueType();
6316 EVT VecVT = Op0.getValueType();
6317
6318 // Extractions of constant indices can be done directly.
6319 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
6320 uint64_t Index = CIndexN->getZExtValue();
6321 unsigned Mask = VecVT.getVectorNumElements() - 1;
6322 if (Index <= Mask)
6323 return Op;
6324 }
6325
6326 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
6327 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
6328 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
6329 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
6330 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
6331 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6332}
6333
6334SDValue SystemZTargetLowering::
6335lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6336 SDValue PackedOp = Op.getOperand(0);
6337 EVT OutVT = Op.getValueType();
6338 EVT InVT = PackedOp.getValueType();
6339 unsigned ToBits = OutVT.getScalarSizeInBits();
6340 unsigned FromBits = InVT.getScalarSizeInBits();
6341 do {
6342 FromBits *= 2;
6343 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
6344 SystemZ::VectorBits / FromBits);
6345 PackedOp =
6346 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
6347 } while (FromBits != ToBits);
6348 return PackedOp;
6349}
6350
6351// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
6352SDValue SystemZTargetLowering::
6353lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6354 SDValue PackedOp = Op.getOperand(0);
6355 SDLoc DL(Op);
6356 EVT OutVT = Op.getValueType();
6357 EVT InVT = PackedOp.getValueType();
6358 unsigned InNumElts = InVT.getVectorNumElements();
6359 unsigned OutNumElts = OutVT.getVectorNumElements();
6360 unsigned NumInPerOut = InNumElts / OutNumElts;
6361
6362 SDValue ZeroVec =
6363 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
6364
6365 SmallVector<int, 16> Mask(InNumElts);
6366 unsigned ZeroVecElt = InNumElts;
6367 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
6368 unsigned MaskElt = PackedElt * NumInPerOut;
6369 unsigned End = MaskElt + NumInPerOut - 1;
6370 for (; MaskElt < End; MaskElt++)
6371 Mask[MaskElt] = ZeroVecElt++;
6372 Mask[MaskElt] = PackedElt;
6373 }
6374 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
6375 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
6376}
6377
6378SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6379 unsigned ByScalar) const {
6380 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6381 SDValue Op0 = Op.getOperand(0);
6382 SDValue Op1 = Op.getOperand(1);
6383 SDLoc DL(Op);
6384 EVT VT = Op.getValueType();
6385 unsigned ElemBitSize = VT.getScalarSizeInBits();
6386
6387 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6388 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6389 APInt SplatBits, SplatUndef;
6390 unsigned SplatBitSize;
6391 bool HasAnyUndefs;
6392 // Check for constant splats. Use ElemBitSize as the minimum element
6393 // width and reject splats that need wider elements.
6394 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6395 ElemBitSize, true) &&
6396 SplatBitSize == ElemBitSize) {
6397 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6398 DL, MVT::i32);
6399 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6400 }
6401 // Check for variable splats.
6402 BitVector UndefElements;
6403 SDValue Splat = BVN->getSplatValue(&UndefElements);
6404 if (Splat) {
6405 // Since i32 is the smallest legal type, we either need a no-op
6406 // or a truncation.
6407 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6408 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6409 }
6410 }
6411
6412 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6413 // and the shift amount is directly available in a GPR.
6414 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6415 if (VSN->isSplat()) {
6416 SDValue VSNOp0 = VSN->getOperand(0);
6417 unsigned Index = VSN->getSplatIndex();
6418 assert(Index < VT.getVectorNumElements() &&
6419 "Splat index should be defined and in first operand");
6420 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6421 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6422 // Since i32 is the smallest legal type, we either need a no-op
6423 // or a truncation.
6424 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6425 VSNOp0.getOperand(Index));
6426 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6427 }
6428 }
6429 }
6430
6431 // Otherwise just treat the current form as legal.
6432 return Op;
6433}
6434
6436 SDLoc dl(Op);
6437 SDValue Src = Op.getOperand(0);
6438 MVT DstVT = Op.getSimpleValueType();
6439
6440 AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Op.getNode());
6441 unsigned SrcAS = N->getSrcAddressSpace();
6442
6443 assert(SrcAS != N->getDestAddressSpace() &&
6444 "addrspacecast must be between different address spaces");
6445
6446 // addrspacecast [0 <- 1] : Assinging a ptr32 value to a 64-bit pointer.
6447 // addrspacecast [1 <- 0] : Assigining a 64-bit pointer to a ptr32 value.
6448 if (SrcAS == SYSTEMZAS::PTR32 && DstVT == MVT::i64) {
6449 Op = DAG.getNode(ISD::AND, dl, MVT::i32, Src,
6450 DAG.getConstant(0x7fffffff, dl, MVT::i32));
6451 Op = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op);
6452 } else if (DstVT == MVT::i32) {
6453 Op = DAG.getNode(ISD::TRUNCATE, dl, DstVT, Src);
6454 Op = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
6455 DAG.getConstant(0x7fffffff, dl, MVT::i32));
6456 Op = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op);
6457 } else {
6458 report_fatal_error("Bad address space in addrspacecast");
6459 }
6460 return Op;
6461}
6462
6463SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
6464 SelectionDAG &DAG) const {
6465 SDLoc DL(Op);
6466 MVT ResultVT = Op.getSimpleValueType();
6467 SDValue Arg = Op.getOperand(0);
6468 unsigned Check = Op.getConstantOperandVal(1);
6469
6470 unsigned TDCMask = 0;
6471 if (Check & fcSNan)
6473 if (Check & fcQNan)
6475 if (Check & fcPosInf)
6477 if (Check & fcNegInf)
6479 if (Check & fcPosNormal)
6481 if (Check & fcNegNormal)
6483 if (Check & fcPosSubnormal)
6485 if (Check & fcNegSubnormal)
6487 if (Check & fcPosZero)
6488 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
6489 if (Check & fcNegZero)
6490 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
6491 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
6492
6493 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
6494 return getCCResult(DAG, Intr);
6495}
6496
6497SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
6498 SelectionDAG &DAG) const {
6499 SDLoc DL(Op);
6500 SDValue Chain = Op.getOperand(0);
6501
6502 // STCKF only supports a memory operand, so we have to use a temporary.
6503 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
6504 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
6505 MachinePointerInfo MPI =
6507
6508 // Use STCFK to store the TOD clock into the temporary.
6509 SDValue StoreOps[] = {Chain, StackPtr};
6510 Chain = DAG.getMemIntrinsicNode(
6511 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
6513
6514 // And read it back from there.
6515 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
6516}
6517
6519 SelectionDAG &DAG) const {
6520 switch (Op.getOpcode()) {
6521 case ISD::FRAMEADDR:
6522 return lowerFRAMEADDR(Op, DAG);
6523 case ISD::RETURNADDR:
6524 return lowerRETURNADDR(Op, DAG);
6525 case ISD::BR_CC:
6526 return lowerBR_CC(Op, DAG);
6527 case ISD::SELECT_CC:
6528 return lowerSELECT_CC(Op, DAG);
6529 case ISD::SETCC:
6530 return lowerSETCC(Op, DAG);
6531 case ISD::STRICT_FSETCC:
6532 return lowerSTRICT_FSETCC(Op, DAG, false);
6534 return lowerSTRICT_FSETCC(Op, DAG, true);
6535 case ISD::GlobalAddress:
6536 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
6538 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
6539 case ISD::BlockAddress:
6540 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
6541 case ISD::JumpTable:
6542 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
6543 case ISD::ConstantPool:
6544 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
6545 case ISD::BITCAST:
6546 return lowerBITCAST(Op, DAG);
6547 case ISD::VASTART:
6548 return lowerVASTART(Op, DAG);
6549 case ISD::VACOPY:
6550 return lowerVACOPY(Op, DAG);
6552 return lowerDYNAMIC_STACKALLOC(Op, DAG);
6554 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
6555 case ISD::MULHS:
6556 return lowerMULH(Op, DAG, SystemZISD::SMUL_LOHI);
6557 case ISD::MULHU:
6558 return lowerMULH(Op, DAG, SystemZISD::UMUL_LOHI);
6559 case ISD::SMUL_LOHI:
6560 return lowerSMUL_LOHI(Op, DAG);
6561 case ISD::UMUL_LOHI:
6562 return lowerUMUL_LOHI(Op, DAG);
6563 case ISD::SDIVREM:
6564 return lowerSDIVREM(Op, DAG);
6565 case ISD::UDIVREM:
6566 return lowerUDIVREM(Op, DAG);
6567 case ISD::SADDO:
6568 case ISD::SSUBO:
6569 case ISD::UADDO:
6570 case ISD::USUBO:
6571 return lowerXALUO(Op, DAG);
6572 case ISD::UADDO_CARRY:
6573 case ISD::USUBO_CARRY:
6574 return lowerUADDSUBO_CARRY(Op, DAG);
6575 case ISD::OR:
6576 return lowerOR(Op, DAG);
6577 case ISD::CTPOP:
6578 return lowerCTPOP(Op, DAG);
6579 case ISD::VECREDUCE_ADD:
6580 return lowerVECREDUCE_ADD(Op, DAG);
6581 case ISD::ATOMIC_FENCE:
6582 return lowerATOMIC_FENCE(Op, DAG);
6583 case ISD::ATOMIC_SWAP:
6584 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
6585 case ISD::ATOMIC_STORE:
6586 case ISD::ATOMIC_LOAD:
6587 return lowerATOMIC_LDST_I128(Op, DAG);
6589 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
6591 return lowerATOMIC_LOAD_SUB(Op, DAG);
6593 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
6595 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
6597 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
6599 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
6601 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
6603 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
6605 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
6607 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
6609 return lowerATOMIC_CMP_SWAP(Op, DAG);
6610 case ISD::STACKSAVE:
6611 return lowerSTACKSAVE(Op, DAG);
6612 case ISD::STACKRESTORE:
6613 return lowerSTACKRESTORE(Op, DAG);
6614 case ISD::PREFETCH:
6615 return lowerPREFETCH(Op, DAG);
6617 return lowerINTRINSIC_W_CHAIN(Op, DAG);
6619 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
6620 case ISD::BUILD_VECTOR:
6621 return lowerBUILD_VECTOR(Op, DAG);
6623 return lowerVECTOR_SHUFFLE(Op, DAG);
6625 return lowerSCALAR_TO_VECTOR(Op, DAG);
6627 return lowerINSERT_VECTOR_ELT(Op, DAG);
6629 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6631 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
6633 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
6634 case ISD::SHL:
6635 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
6636 case ISD::SRL:
6637 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
6638 case ISD::SRA:
6639 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
6640 case ISD::ADDRSPACECAST:
6641 return lowerAddrSpaceCast(Op, DAG);
6642 case ISD::ROTL:
6643 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
6644 case ISD::IS_FPCLASS:
6645 return lowerIS_FPCLASS(Op, DAG);
6646 case ISD::GET_ROUNDING:
6647 return lowerGET_ROUNDING(Op, DAG);
6649 return lowerREADCYCLECOUNTER(Op, DAG);
6652 // These operations are legal on our platform, but we cannot actually
6653 // set the operation action to Legal as common code would treat this
6654 // as equivalent to Expand. Instead, we keep the operation action to
6655 // Custom and just leave them unchanged here.
6656 return Op;
6657
6658 default:
6659 llvm_unreachable("Unexpected node to lower");
6660 }
6661}
6662
6664 const SDLoc &SL) {
6665 // If i128 is legal, just use a normal bitcast.
6666 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
6667 return DAG.getBitcast(MVT::f128, Src);
6668
6669 // Otherwise, f128 must live in FP128, so do a partwise move.
6671 &SystemZ::FP128BitRegClass);
6672
6673 SDValue Hi, Lo;
6674 std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);
6675
6676 Hi = DAG.getBitcast(MVT::f64, Hi);
6677 Lo = DAG.getBitcast(MVT::f64, Lo);
6678
6679 SDNode *Pair = DAG.getMachineNode(
6680 SystemZ::REG_SEQUENCE, SL, MVT::f128,
6681 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo,
6682 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi,
6683 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)});
6684 return SDValue(Pair, 0);
6685}
6686
6688 const SDLoc &SL) {
6689 // If i128 is legal, just use a normal bitcast.
6690 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
6691 return DAG.getBitcast(MVT::i128, Src);
6692
6693 // Otherwise, f128 must live in FP128, so do a partwise move.
6695 &SystemZ::FP128BitRegClass);
6696
6697 SDValue LoFP =
6698 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
6699 SDValue HiFP =
6700 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);
6701 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
6702 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
6703
6704 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);
6705}
6706
6707// Lower operations with invalid operand or result types (currently used
6708// only for 128-bit integer types).
6709void
6712 SelectionDAG &DAG) const {
6713 switch (N->getOpcode()) {
6714 case ISD::ATOMIC_LOAD: {
6715 SDLoc DL(N);
6716 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
6717 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
6718 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6720 DL, Tys, Ops, MVT::i128, MMO);
6721
6722 SDValue Lowered = lowerGR128ToI128(DAG, Res);
6723 if (N->getValueType(0) == MVT::f128)
6724 Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);
6725 Results.push_back(Lowered);
6726 Results.push_back(Res.getValue(1));
6727 break;
6728 }
6729 case ISD::ATOMIC_STORE: {
6730 SDLoc DL(N);
6731 SDVTList Tys = DAG.getVTList(MVT::Other);
6732 SDValue Val = N->getOperand(1);
6733 if (Val.getValueType() == MVT::f128)
6734 Val = expandBitCastF128ToI128(DAG, Val, DL);
6735 Val = lowerI128ToGR128(DAG, Val);
6736
6737 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
6738 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6740 DL, Tys, Ops, MVT::i128, MMO);
6741 // We have to enforce sequential consistency by performing a
6742 // serialization operation after the store.
6743 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
6745 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
6746 MVT::Other, Res), 0);
6747 Results.push_back(Res);
6748 break;
6749 }
6751 SDLoc DL(N);
6752 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
6753 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
6754 lowerI128ToGR128(DAG, N->getOperand(2)),
6755 lowerI128ToGR128(DAG, N->getOperand(3)) };
6756 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6758 DL, Tys, Ops, MVT::i128, MMO);
6759 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
6761 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
6762 Results.push_back(lowerGR128ToI128(DAG, Res));
6763 Results.push_back(Success);
6764 Results.push_back(Res.getValue(2));
6765 break;
6766 }
6767 case ISD::BITCAST: {
6768 SDValue Src = N->getOperand(0);
6769 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
6770 !useSoftFloat()) {
6771 SDLoc DL(N);
6772 Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));
6773 }
6774 break;
6775 }
6776 default:
6777 llvm_unreachable("Unexpected node to lower");
6778 }
6779}
6780
6781void
6784 SelectionDAG &DAG) const {
6785 return LowerOperationWrapper(N, Results, DAG);
6786}
6787
6788const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
6789#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
6790 switch ((SystemZISD::NodeType)Opcode) {
6791 case SystemZISD::FIRST_NUMBER: break;
6792 OPCODE(RET_GLUE);
6793 OPCODE(CALL);
6794 OPCODE(SIBCALL);
6795 OPCODE(TLS_GDCALL);
6796 OPCODE(TLS_LDCALL);
6797 OPCODE(PCREL_WRAPPER);
6798 OPCODE(PCREL_OFFSET);
6799 OPCODE(ICMP);
6800 OPCODE(FCMP);
6801 OPCODE(STRICT_FCMP);
6802 OPCODE(STRICT_FCMPS);
6803 OPCODE(TM);
6804 OPCODE(BR_CCMASK);
6805 OPCODE(SELECT_CCMASK);
6806 OPCODE(ADJDYNALLOC);
6807 OPCODE(PROBED_ALLOCA);
6808 OPCODE(POPCNT);
6809 OPCODE(SMUL_LOHI);
6810 OPCODE(UMUL_LOHI);
6811 OPCODE(SDIVREM);
6812 OPCODE(UDIVREM);
6813 OPCODE(SADDO);
6814 OPCODE(SSUBO);
6815 OPCODE(UADDO);
6816 OPCODE(USUBO);
6817 OPCODE(ADDCARRY);
6818 OPCODE(SUBCARRY);
6819 OPCODE(GET_CCMASK);
6820 OPCODE(MVC);
6821 OPCODE(NC);
6822 OPCODE(OC);
6823 OPCODE(XC);
6824 OPCODE(CLC);
6825 OPCODE(MEMSET_MVC);
6826 OPCODE(STPCPY);
6827 OPCODE(STRCMP);
6828 OPCODE(SEARCH_STRING);
6829 OPCODE(IPM);
6830 OPCODE(TBEGIN);
6831 OPCODE(TBEGIN_NOFLOAT);
6832 OPCODE(TEND);
6833 OPCODE(BYTE_MASK);
6834 OPCODE(ROTATE_MASK);
6835 OPCODE(REPLICATE);
6836 OPCODE(JOIN_DWORDS);
6837 OPCODE(SPLAT);
6838 OPCODE(MERGE_HIGH);
6839 OPCODE(MERGE_LOW);
6840 OPCODE(SHL_DOUBLE);
6841 OPCODE(PERMUTE_DWORDS);
6842 OPCODE(PERMUTE);
6843 OPCODE(PACK);
6844 OPCODE(PACKS_CC);
6845 OPCODE(PACKLS_CC);
6846 OPCODE(UNPACK_HIGH);
6847 OPCODE(UNPACKL_HIGH);
6848 OPCODE(UNPACK_LOW);
6849 OPCODE(UNPACKL_LOW);
6850 OPCODE(VSHL_BY_SCALAR);
6851 OPCODE(VSRL_BY_SCALAR);
6852 OPCODE(VSRA_BY_SCALAR);
6853 OPCODE(VROTL_BY_SCALAR);
6854 OPCODE(VSUM);
6855 OPCODE(VACC);
6856 OPCODE(VSCBI);
6857 OPCODE(VAC);
6858 OPCODE(VSBI);
6859 OPCODE(VACCC);
6860 OPCODE(VSBCBI);
6861 OPCODE(VICMPE);
6862 OPCODE(VICMPH);
6863 OPCODE(VICMPHL);
6864 OPCODE(VICMPES);
6865 OPCODE(VICMPHS);
6866 OPCODE(VICMPHLS);
6867 OPCODE(VFCMPE);
6868 OPCODE(STRICT_VFCMPE);
6869 OPCODE(STRICT_VFCMPES);
6870 OPCODE(VFCMPH);
6871 OPCODE(STRICT_VFCMPH);
6872 OPCODE(STRICT_VFCMPHS);
6873 OPCODE(VFCMPHE);
6874 OPCODE(STRICT_VFCMPHE);
6875 OPCODE(STRICT_VFCMPHES);
6876 OPCODE(VFCMPES);
6877 OPCODE(VFCMPHS);
6878 OPCODE(VFCMPHES);
6879 OPCODE(VFTCI);
6880 OPCODE(VEXTEND);
6881 OPCODE(STRICT_VEXTEND);
6882 OPCODE(VROUND);
6883 OPCODE(STRICT_VROUND);
6884 OPCODE(VTM);
6885 OPCODE(SCMP128HI);
6886 OPCODE(UCMP128HI);
6887 OPCODE(VFAE_CC);
6888 OPCODE(VFAEZ_CC);
6889 OPCODE(VFEE_CC);
6890 OPCODE(VFEEZ_CC);
6891 OPCODE(VFENE_CC);
6892 OPCODE(VFENEZ_CC);
6893 OPCODE(VISTR_CC);
6894 OPCODE(VSTRC_CC);
6895 OPCODE(VSTRCZ_CC);
6896 OPCODE(VSTRS_CC);
6897 OPCODE(VSTRSZ_CC);
6898 OPCODE(TDC);
6899 OPCODE(ATOMIC_SWAPW);
6900 OPCODE(ATOMIC_LOADW_ADD);
6901 OPCODE(ATOMIC_LOADW_SUB);
6902 OPCODE(ATOMIC_LOADW_AND);
6903 OPCODE(ATOMIC_LOADW_OR);
6904 OPCODE(ATOMIC_LOADW_XOR);
6905 OPCODE(ATOMIC_LOADW_NAND);
6906 OPCODE(ATOMIC_LOADW_MIN);
6907 OPCODE(ATOMIC_LOADW_MAX);
6908 OPCODE(ATOMIC_LOADW_UMIN);
6909 OPCODE(ATOMIC_LOADW_UMAX);
6910 OPCODE(ATOMIC_CMP_SWAPW);
6911 OPCODE(ATOMIC_CMP_SWAP);
6912 OPCODE(ATOMIC_LOAD_128);
6913 OPCODE(ATOMIC_STORE_128);
6914 OPCODE(ATOMIC_CMP_SWAP_128);
6915 OPCODE(LRV);
6916 OPCODE(STRV);
6917 OPCODE(VLER);
6918 OPCODE(VSTER);
6919 OPCODE(STCKF);
6921 OPCODE(ADA_ENTRY);
6922 }
6923 return nullptr;
6924#undef OPCODE
6925}
6926
6927// Return true if VT is a vector whose elements are a whole number of bytes
6928// in width. Also check for presence of vector support.
6929bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
6930 if (!Subtarget.hasVector())
6931 return false;
6932
6933 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
6934}
6935
6936// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
6937// producing a result of type ResVT. Op is a possibly bitcast version
6938// of the input vector and Index is the index (based on type VecVT) that
6939// should be extracted. Return the new extraction if a simplification
6940// was possible or if Force is true.
6941SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
6942 EVT VecVT, SDValue Op,
6943 unsigned Index,
6944 DAGCombinerInfo &DCI,
6945 bool Force) const {
6946 SelectionDAG &DAG = DCI.DAG;
6947
6948 // The number of bytes being extracted.
6949 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6950
6951 for (;;) {
6952 unsigned Opcode = Op.getOpcode();
6953 if (Opcode == ISD::BITCAST)
6954 // Look through bitcasts.
6955 Op = Op.getOperand(0);
6956 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
6957 canTreatAsByteVector(Op.getValueType())) {
6958 // Get a VPERM-like permute mask and see whether the bytes covered
6959 // by the extracted element are a contiguous sequence from one
6960 // source operand.
6962 if (!getVPermMask(Op, Bytes))
6963 break;
6964 int First;
6965 if (!getShuffleInput(Bytes, Index * BytesPerElement,
6966 BytesPerElement, First))
6967 break;
6968 if (First < 0)
6969 return DAG.getUNDEF(ResVT);
6970 // Make sure the contiguous sequence starts at a multiple of the
6971 // original element size.
6972 unsigned Byte = unsigned(First) % Bytes.size();
6973 if (Byte % BytesPerElement != 0)
6974 break;
6975 // We can get the extracted value directly from an input.
6976 Index = Byte / BytesPerElement;
6977 Op = Op.getOperand(unsigned(First) / Bytes.size());
6978 Force = true;
6979 } else if (Opcode == ISD::BUILD_VECTOR &&
6980 canTreatAsByteVector(Op.getValueType())) {
6981 // We can only optimize this case if the BUILD_VECTOR elements are
6982 // at least as wide as the extracted value.
6983 EVT OpVT = Op.getValueType();
6984 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6985 if (OpBytesPerElement < BytesPerElement)
6986 break;
6987 // Make sure that the least-significant bit of the extracted value
6988 // is the least significant bit of an input.
6989 unsigned End = (Index + 1) * BytesPerElement;
6990 if (End % OpBytesPerElement != 0)
6991 break;
6992 // We're extracting the low part of one operand of the BUILD_VECTOR.
6993 Op = Op.getOperand(End / OpBytesPerElement - 1);
6994 if (!Op.getValueType().isInteger()) {
6995 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
6996 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
6997 DCI.AddToWorklist(Op.getNode());
6998 }
6999 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
7000 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7001 if (VT != ResVT) {
7002 DCI.AddToWorklist(Op.getNode());
7003 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
7004 }
7005 return Op;
7006 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
7008 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
7009 canTreatAsByteVector(Op.getValueType()) &&
7010 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
7011 // Make sure that only the unextended bits are significant.
7012 EVT ExtVT = Op.getValueType();
7013 EVT OpVT = Op.getOperand(0).getValueType();
7014 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
7015 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7016 unsigned Byte = Index * BytesPerElement;
7017 unsigned SubByte = Byte % ExtBytesPerElement;
7018 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
7019 if (SubByte < MinSubByte ||
7020 SubByte + BytesPerElement > ExtBytesPerElement)
7021 break;
7022 // Get the byte offset of the unextended element
7023 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
7024 // ...then add the byte offset relative to that element.
7025 Byte += SubByte - MinSubByte;
7026 if (Byte % BytesPerElement != 0)
7027 break;
7028 Op = Op.getOperand(0);
7029 Index = Byte / BytesPerElement;
7030 Force = true;
7031 } else
7032 break;
7033 }
7034 if (Force) {
7035 if (Op.getValueType() != VecVT) {
7036 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
7037 DCI.AddToWorklist(Op.getNode());
7038 }
7039 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
7040 DAG.getConstant(Index, DL, MVT::i32));
7041 }
7042 return SDValue();
7043}
7044
7045// Optimize vector operations in scalar value Op on the basis that Op
7046// is truncated to TruncVT.
7047SDValue SystemZTargetLowering::combineTruncateExtract(
7048 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
7049 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
7050 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
7051 // of type TruncVT.
7052 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7053 TruncVT.getSizeInBits() % 8 == 0) {
7054 SDValue Vec = Op.getOperand(0);
7055 EVT VecVT = Vec.getValueType();
7056 if (canTreatAsByteVector(VecVT)) {
7057 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
7058 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7059 unsigned TruncBytes = TruncVT.getStoreSize();
7060 if (BytesPerElement % TruncBytes == 0) {
7061 // Calculate the value of Y' in the above description. We are
7062 // splitting the original elements into Scale equal-sized pieces
7063 // and for truncation purposes want the last (least-significant)
7064 // of these pieces for IndexN. This is easiest to do by calculating
7065 // the start index of the following element and then subtracting 1.
7066 unsigned Scale = BytesPerElement / TruncBytes;
7067 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
7068
7069 // Defer the creation of the bitcast from X to combineExtract,
7070 // which might be able to optimize the extraction.
7071 VecVT = EVT::getVectorVT(*DCI.DAG.getContext(),
7072 MVT::getIntegerVT(TruncBytes * 8),
7073 VecVT.getStoreSize() / TruncBytes);
7074 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
7075 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
7076 }
7077 }
7078 }
7079 }
7080 return SDValue();
7081}
7082
7083SDValue SystemZTargetLowering::combineZERO_EXTEND(
7084 SDNode *N, DAGCombinerInfo &DCI) const {
7085 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
7086 SelectionDAG &DAG = DCI.DAG;
7087 SDValue N0 = N->getOperand(0);
7088 EVT VT = N->getValueType(0);
7090 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
7091 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7092 if (TrueOp && FalseOp) {
7093 SDLoc DL(N0);
7094 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
7095 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
7096 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
7097 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
7098 // If N0 has multiple uses, change other uses as well.
7099 if (!N0.hasOneUse()) {
7100 SDValue TruncSelect =
7101 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
7102 DCI.CombineTo(N0.getNode(), TruncSelect);
7103 }
7104 return NewSelect;
7105 }
7106 }
7107 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
7108 // of the result is smaller than the size of X and all the truncated bits
7109 // of X are already zero.
7110 if (N0.getOpcode() == ISD::XOR &&
7111 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
7112 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7113 N0.getOperand(1).getOpcode() == ISD::Constant) {
7114 SDValue X = N0.getOperand(0).getOperand(0);
7115 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
7116 KnownBits Known = DAG.computeKnownBits(X);
7117 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
7118 N0.getValueSizeInBits(),
7119 VT.getSizeInBits());
7120 if (TruncatedBits.isSubsetOf(Known.Zero)) {
7121 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7123 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
7124 X, DAG.getConstant(Mask, SDLoc(N0), VT));
7125 }
7126 }
7127 }
7128
7129 return SDValue();
7130}
7131
7132SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
7133 SDNode *N, DAGCombinerInfo &DCI) const {
7134 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
7135 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
7136 // into (select_cc LHS, RHS, -1, 0, COND)
7137 SelectionDAG &DAG = DCI.DAG;
7138 SDValue N0 = N->getOperand(0);
7139 EVT VT = N->getValueType(0);
7140 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7141 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
7142 N0 = N0.getOperand(0);
7143 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
7144 SDLoc DL(N0);
7145 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
7146 DAG.getAllOnesConstant(DL, VT),
7147 DAG.getConstant(0, DL, VT), N0.getOperand(2) };
7148 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
7149 }
7150 return SDValue();
7151}
7152
7153SDValue SystemZTargetLowering::combineSIGN_EXTEND(
7154 SDNode *N, DAGCombinerInfo &DCI) const {
7155 // Convert (sext (ashr (shl X, C1), C2)) to
7156 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
7157 // cheap as narrower ones.
7158 SelectionDAG &DAG = DCI.DAG;
7159 SDValue N0 = N->getOperand(0);
7160 EVT VT = N->getValueType(0);
7161 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
7162 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7163 SDValue Inner = N0.getOperand(0);
7164 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
7165 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
7166 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
7167 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
7168 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
7169 EVT ShiftVT = N0.getOperand(1).getValueType();
7170 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
7171 Inner.getOperand(0));
7172 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
7173 DAG.getConstant(NewShlAmt, SDLoc(Inner),
7174 ShiftVT));
7175 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
7176 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
7177 }
7178 }
7179 }
7180
7181 return SDValue();
7182}
7183
7184SDValue SystemZTargetLowering::combineMERGE(
7185 SDNode *N, DAGCombinerInfo &DCI) const {
7186 SelectionDAG &DAG = DCI.DAG;
7187 unsigned Opcode = N->getOpcode();
7188 SDValue Op0 = N->getOperand(0);
7189 SDValue Op1 = N->getOperand(1);
7190 if (Op0.getOpcode() == ISD::BITCAST)
7191 Op0 = Op0.getOperand(0);
7193 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
7194 // for v4f32.
7195 if (Op1 == N->getOperand(0))
7196 return Op1;
7197 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
7198 EVT VT = Op1.getValueType();
7199 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
7200 if (ElemBytes <= 4) {
7201 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
7204 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
7205 SystemZ::VectorBytes / ElemBytes / 2);
7206 if (VT != InVT) {
7207 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
7208 DCI.AddToWorklist(Op1.getNode());
7209 }
7210 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
7211 DCI.AddToWorklist(Op.getNode());
7212 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
7213 }
7214 }
7215 return SDValue();
7216}
7217
7218static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7219 SDNode *&HiPart) {
7220 LoPart = HiPart = nullptr;
7221
7222 // Scan through all users.
7223 for (SDUse &Use : LD->uses()) {
7224 // Skip the uses of the chain.
7225 if (Use.getResNo() != 0)
7226 continue;
7227
7228 // Verify every user is a TRUNCATE to i64 of the low or high half.
7229 SDNode *User = Use.getUser();
7230 bool IsLoPart = true;
7231 if (User->getOpcode() == ISD::SRL &&
7232 User->getOperand(1).getOpcode() == ISD::Constant &&
7233 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
7234 User = *User->user_begin();
7235 IsLoPart = false;
7236 }
7237 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)
7238 return false;
7239
7240 if (IsLoPart) {
7241 if (LoPart)
7242 return false;
7243 LoPart = User;
7244 } else {
7245 if (HiPart)
7246 return false;
7247 HiPart = User;
7248 }
7249 }
7250 return true;
7251}
7252
7253static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7254 SDNode *&HiPart) {
7255 LoPart = HiPart = nullptr;
7256
7257 // Scan through all users.
7258 for (SDUse &Use : LD->uses()) {
7259 // Skip the uses of the chain.
7260 if (Use.getResNo() != 0)
7261 continue;
7262
7263 // Verify every user is an EXTRACT_SUBREG of the low or high half.
7264 SDNode *User = Use.getUser();
7265 if (!User->hasOneUse() || !User->isMachineOpcode() ||
7266 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
7267 return false;
7268
7269 switch (User->getConstantOperandVal(1)) {
7270 case SystemZ::subreg_l64:
7271 if (LoPart)
7272 return false;
7273 LoPart = User;
7274 break;
7275 case SystemZ::subreg_h64:
7276 if (HiPart)
7277 return false;
7278 HiPart = User;
7279 break;
7280 default:
7281 return false;
7282 }
7283 }
7284 return true;
7285}
7286
7287SDValue SystemZTargetLowering::combineLOAD(
7288 SDNode *N, DAGCombinerInfo &DCI) const {
7289 SelectionDAG &DAG = DCI.DAG;
7290 EVT LdVT = N->getValueType(0);
7291 if (auto *LN = dyn_cast<LoadSDNode>(N)) {
7292 if (LN->getAddressSpace() == SYSTEMZAS::PTR32) {
7293 MVT PtrVT = getPointerTy(DAG.getDataLayout());
7294 MVT LoadNodeVT = LN->getBasePtr().getSimpleValueType();
7295 if (PtrVT != LoadNodeVT) {
7296 SDLoc DL(LN);
7297 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(
7298 DL, PtrVT, LN->getBasePtr(), SYSTEMZAS::PTR32, 0);
7299 return DAG.getExtLoad(LN->getExtensionType(), DL, LN->getValueType(0),
7300 LN->getChain(), AddrSpaceCast, LN->getMemoryVT(),
7301 LN->getMemOperand());
7302 }
7303 }
7304 }
7305 SDLoc DL(N);
7306
7307 // Replace a 128-bit load that is used solely to move its value into GPRs
7308 // by separate loads of both halves.
7309 LoadSDNode *LD = cast<LoadSDNode>(N);
7310 if (LD->isSimple() && ISD::isNormalLoad(LD)) {
7311 SDNode *LoPart, *HiPart;
7312 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||
7313 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {
7314 // Rewrite each extraction as an independent load.
7315 SmallVector<SDValue, 2> ArgChains;
7316 if (HiPart) {
7317 SDValue EltLoad = DAG.getLoad(
7318 HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(),
7319 LD->getPointerInfo(), LD->getOriginalAlign(),
7320 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7321
7322 DCI.CombineTo(HiPart, EltLoad, true);
7323 ArgChains.push_back(EltLoad.getValue(1));
7324 }
7325 if (LoPart) {
7326 SDValue EltLoad = DAG.getLoad(
7327 LoPart->getValueType(0), DL, LD->getChain(),
7328 DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)),
7329 LD->getPointerInfo().getWithOffset(8), LD->getOriginalAlign(),
7330 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7331
7332 DCI.CombineTo(LoPart, EltLoad, true);
7333 ArgChains.push_back(EltLoad.getValue(1));
7334 }
7335
7336 // Collect all chains via TokenFactor.
7337 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains);
7338 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
7339 DCI.AddToWorklist(Chain.getNode());
7340 return SDValue(N, 0);
7341 }
7342 }
7343
7344 if (LdVT.isVector() || LdVT.isInteger())
7345 return SDValue();
7346 // Transform a scalar load that is REPLICATEd as well as having other
7347 // use(s) to the form where the other use(s) use the first element of the
7348 // REPLICATE instead of the load. Otherwise instruction selection will not
7349 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
7350 // point loads.
7351
7352 SDValue Replicate;
7353 SmallVector<SDNode*, 8> OtherUses;
7354 for (SDUse &Use : N->uses()) {
7355 if (Use.getUser()->getOpcode() == SystemZISD::REPLICATE) {
7356 if (Replicate)
7357 return SDValue(); // Should never happen
7358 Replicate = SDValue(Use.getUser(), 0);
7359 } else if (Use.getResNo() == 0)
7360 OtherUses.push_back(Use.getUser());
7361 }
7362 if (!Replicate || OtherUses.empty())
7363 return SDValue();
7364
7365 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
7366 Replicate, DAG.getConstant(0, DL, MVT::i32));
7367 // Update uses of the loaded Value while preserving old chains.
7368 for (SDNode *U : OtherUses) {
7370 for (SDValue Op : U->ops())
7371 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
7372 DAG.UpdateNodeOperands(U, Ops);
7373 }
7374 return SDValue(N, 0);
7375}
7376
7377bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
7378 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
7379 return true;
7380 if (Subtarget.hasVectorEnhancements2())
7381 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
7382 return true;
7383 return false;
7384}
7385
7387 if (!VT.isVector() || !VT.isSimple() ||
7388 VT.getSizeInBits() != 128 ||
7389 VT.getScalarSizeInBits() % 8 != 0)
7390 return false;
7391
7392 unsigned NumElts = VT.getVectorNumElements();
7393 for (unsigned i = 0; i < NumElts; ++i) {
7394 if (M[i] < 0) continue; // ignore UNDEF indices
7395 if ((unsigned) M[i] != NumElts - 1 - i)
7396 return false;
7397 }
7398
7399 return true;
7400}
7401
7402static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
7403 for (auto *U : StoredVal->users()) {
7404 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
7405 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
7406 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
7407 continue;
7408 } else if (isa<BuildVectorSDNode>(U)) {
7409 SDValue BuildVector = SDValue(U, 0);
7410 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
7411 isOnlyUsedByStores(BuildVector, DAG))
7412 continue;
7413 }
7414 return false;
7415 }
7416 return true;
7417}
7418
7419static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
7420 SDValue &HiPart) {
7421 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
7422 return false;
7423
7424 SDValue Op0 = Val.getOperand(0);
7425 SDValue Op1 = Val.getOperand(1);
7426
7427 if (Op0.getOpcode() == ISD::SHL)
7428 std::swap(Op0, Op1);
7429 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
7430 Op1.getOperand(1).getOpcode() != ISD::Constant ||
7431 Op1.getConstantOperandVal(1) != 64)
7432 return false;
7433 Op1 = Op1.getOperand(0);
7434
7435 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
7436 Op0.getOperand(0).getValueType() != MVT::i64)
7437 return false;
7438 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
7439 Op1.getOperand(0).getValueType() != MVT::i64)
7440 return false;
7441
7442 LoPart = Op0.getOperand(0);
7443 HiPart = Op1.getOperand(0);
7444 return true;
7445}
7446
7447static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
7448 SDValue &HiPart) {
7449 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
7450 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
7451 return false;
7452
7453 if (Val->getNumOperands() != 5 ||
7454 Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
7455 Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||
7456 Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)
7457 return false;
7458
7459 LoPart = Val->getOperand(1);
7460 HiPart = Val->getOperand(3);
7461 return true;
7462}
7463
7464SDValue SystemZTargetLowering::combineSTORE(
7465 SDNode *N, DAGCombinerInfo &DCI) const {
7466 SelectionDAG &DAG = DCI.DAG;
7467 auto *SN = cast<StoreSDNode>(N);
7468 auto &Op1 = N->getOperand(1);
7469 EVT MemVT = SN->getMemoryVT();
7470
7471 if (SN->getAddressSpace() == SYSTEMZAS::PTR32) {
7472 MVT PtrVT = getPointerTy(DAG.getDataLayout());
7473 MVT StoreNodeVT = SN->getBasePtr().getSimpleValueType();
7474 if (PtrVT != StoreNodeVT) {
7475 SDLoc DL(SN);
7476 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(DL, PtrVT, SN->getBasePtr(),
7477 SYSTEMZAS::PTR32, 0);
7478 return DAG.getStore(SN->getChain(), DL, SN->getValue(), AddrSpaceCast,
7479 SN->getPointerInfo(), SN->getOriginalAlign(),
7480 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7481 }
7482 }
7483
7484 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
7485 // for the extraction to be done on a vMiN value, so that we can use VSTE.
7486 // If X has wider elements then convert it to:
7487 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
7488 if (MemVT.isInteger() && SN->isTruncatingStore()) {
7489 if (SDValue Value =
7490 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
7491 DCI.AddToWorklist(Value.getNode());
7492
7493 // Rewrite the store with the new form of stored value.
7494 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
7495 SN->getBasePtr(), SN->getMemoryVT(),
7496 SN->getMemOperand());
7497 }
7498 }
7499 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
7500 if (!SN->isTruncatingStore() &&
7501 Op1.getOpcode() == ISD::BSWAP &&
7502 Op1.getNode()->hasOneUse() &&
7503 canLoadStoreByteSwapped(Op1.getValueType())) {
7504
7505 SDValue BSwapOp = Op1.getOperand(0);
7506
7507 if (BSwapOp.getValueType() == MVT::i16)
7508 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
7509
7510 SDValue Ops[] = {
7511 N->getOperand(0), BSwapOp, N->getOperand(2)
7512 };
7513
7514 return
7515 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
7516 Ops, MemVT, SN->getMemOperand());
7517 }
7518 // Combine STORE (element-swap) into VSTER
7519 if (!SN->isTruncatingStore() &&
7520 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
7521 Op1.getNode()->hasOneUse() &&
7522 Subtarget.hasVectorEnhancements2()) {
7523 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
7524 ArrayRef<int> ShuffleMask = SVN->getMask();
7525 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
7526 SDValue Ops[] = {
7527 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
7528 };
7529
7531 DAG.getVTList(MVT::Other),
7532 Ops, MemVT, SN->getMemOperand());
7533 }
7534 }
7535
7536 // Combine STORE (READCYCLECOUNTER) into STCKF.
7537 if (!SN->isTruncatingStore() &&
7539 Op1.hasOneUse() &&
7540 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
7541 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
7543 DAG.getVTList(MVT::Other),
7544 Ops, MemVT, SN->getMemOperand());
7545 }
7546
7547 // Transform a store of a 128-bit value moved from parts into two stores.
7548 if (SN->isSimple() && ISD::isNormalStore(SN)) {
7549 SDValue LoPart, HiPart;
7550 if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||
7551 (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {
7552 SDLoc DL(SN);
7553 SDValue Chain0 =
7554 DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(),
7555 SN->getPointerInfo(), SN->getOriginalAlign(),
7556 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7557 SDValue Chain1 =
7558 DAG.getStore(SN->getChain(), DL, LoPart,
7559 DAG.getObjectPtrOffset(DL, SN->getBasePtr(),
7561 SN->getPointerInfo().getWithOffset(8),
7562 SN->getOriginalAlign(),
7563 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7564
7565 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
7566 }
7567 }
7568
7569 // Replicate a reg or immediate with VREP instead of scalar multiply or
7570 // immediate load. It seems best to do this during the first DAGCombine as
7571 // it is straight-forward to handle the zero-extend node in the initial
7572 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
7573 // extracting an i16 element from a v16i8 vector).
7574 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
7575 isOnlyUsedByStores(Op1, DAG)) {
7576 SDValue Word = SDValue();
7577 EVT WordVT;
7578
7579 // Find a replicated immediate and return it if found in Word and its
7580 // type in WordVT.
7581 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
7582 // Some constants are better handled with a scalar store.
7583 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
7584 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
7585 return;
7586
7587 APInt Val = C->getAPIntValue();
7588 // Truncate Val in case of a truncating store.
7589 if (!llvm::isUIntN(TotBytes * 8, Val.getZExtValue())) {
7590 assert(SN->isTruncatingStore() &&
7591 "Non-truncating store and immediate value does not fit?");
7592 Val = Val.trunc(TotBytes * 8);
7593 }
7594
7595 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, Val.getZExtValue()));
7596 if (VCI.isVectorConstantLegal(Subtarget) &&
7597 VCI.Opcode == SystemZISD::REPLICATE) {
7598 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
7599 WordVT = VCI.VecVT.getScalarType();
7600 }
7601 };
7602
7603 // Find a replicated register and return it if found in Word and its type
7604 // in WordVT.
7605 auto FindReplicatedReg = [&](SDValue MulOp) {
7606 EVT MulVT = MulOp.getValueType();
7607 if (MulOp->getOpcode() == ISD::MUL &&
7608 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
7609 // Find a zero extended value and its type.
7610 SDValue LHS = MulOp->getOperand(0);
7611 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
7612 WordVT = LHS->getOperand(0).getValueType();
7613 else if (LHS->getOpcode() == ISD::AssertZext)
7614 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
7615 else
7616 return;
7617 // Find a replicating constant, e.g. 0x00010001.
7618 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
7620 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
7621 if (VCI.isVectorConstantLegal(Subtarget) &&
7622 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
7623 WordVT == VCI.VecVT.getScalarType())
7624 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
7625 }
7626 }
7627 };
7628
7629 if (isa<BuildVectorSDNode>(Op1) &&
7630 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
7631 SDValue SplatVal = Op1->getOperand(0);
7632 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
7633 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
7634 else
7635 FindReplicatedReg(SplatVal);
7636 } else {
7637 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
7638 FindReplicatedImm(C, MemVT.getStoreSize());
7639 else
7640 FindReplicatedReg(Op1);
7641 }
7642
7643 if (Word != SDValue()) {
7644 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
7645 "Bad type handling");
7646 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
7647 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
7648 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
7649 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
7650 SN->getBasePtr(), SN->getMemOperand());
7651 }
7652 }
7653
7654 return SDValue();
7655}
7656
7657SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
7658 SDNode *N, DAGCombinerInfo &DCI) const {
7659 SelectionDAG &DAG = DCI.DAG;
7660 // Combine element-swap (LOAD) into VLER
7661 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7662 N->getOperand(0).hasOneUse() &&
7663 Subtarget.hasVectorEnhancements2()) {
7664 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
7665 ArrayRef<int> ShuffleMask = SVN->getMask();
7666 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
7667 SDValue Load = N->getOperand(0);
7668 LoadSDNode *LD = cast<LoadSDNode>(Load);
7669
7670 // Create the element-swapping load.
7671 SDValue Ops[] = {
7672 LD->getChain(), // Chain
7673 LD->getBasePtr() // Ptr
7674 };
7675 SDValue ESLoad =
7677 DAG.getVTList(LD->getValueType(0), MVT::Other),
7678 Ops, LD->getMemoryVT(), LD->getMemOperand());
7679
7680 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
7681 // by the load dead.
7682 DCI.CombineTo(N, ESLoad);
7683
7684 // Next, combine the load away, we give it a bogus result value but a real
7685 // chain result. The result value is dead because the shuffle is dead.
7686 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
7687
7688 // Return N so it doesn't get rechecked!
7689 return SDValue(N, 0);
7690 }
7691 }
7692
7693 return SDValue();
7694}
7695
7696SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
7697 SDNode *N, DAGCombinerInfo &DCI) const {
7698 SelectionDAG &DAG = DCI.DAG;
7699
7700 if (!Subtarget.hasVector())
7701 return SDValue();
7702
7703 // Look through bitcasts that retain the number of vector elements.
7704 SDValue Op = N->getOperand(0);
7705 if (Op.getOpcode() == ISD::BITCAST &&
7706 Op.getValueType().isVector() &&
7707 Op.getOperand(0).getValueType().isVector() &&
7708 Op.getValueType().getVectorNumElements() ==
7709 Op.getOperand(0).getValueType().getVectorNumElements())
7710 Op = Op.getOperand(0);
7711
7712 // Pull BSWAP out of a vector extraction.
7713 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
7714 EVT VecVT = Op.getValueType();
7715 EVT EltVT = VecVT.getVectorElementType();
7716 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
7717 Op.getOperand(0), N->getOperand(1));
7718 DCI.AddToWorklist(Op.getNode());
7719 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
7720 if (EltVT != N->getValueType(0)) {
7721 DCI.AddToWorklist(Op.getNode());
7722 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
7723 }
7724 return Op;
7725 }
7726
7727 // Try to simplify a vector extraction.
7728 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
7729 SDValue Op0 = N->getOperand(0);
7730 EVT VecVT = Op0.getValueType();
7731 if (canTreatAsByteVector(VecVT))
7732 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
7733 IndexN->getZExtValue(), DCI, false);
7734 }
7735 return SDValue();
7736}
7737
7738SDValue SystemZTargetLowering::combineJOIN_DWORDS(
7739 SDNode *N, DAGCombinerInfo &DCI) const {
7740 SelectionDAG &DAG = DCI.DAG;
7741 // (join_dwords X, X) == (replicate X)
7742 if (N->getOperand(0) == N->getOperand(1))
7743 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
7744 N->getOperand(0));
7745 return SDValue();
7746}
7747
7749 SDValue Chain1 = N1->getOperand(0);
7750 SDValue Chain2 = N2->getOperand(0);
7751
7752 // Trivial case: both nodes take the same chain.
7753 if (Chain1 == Chain2)
7754 return Chain1;
7755
7756 // FIXME - we could handle more complex cases via TokenFactor,
7757 // assuming we can verify that this would not create a cycle.
7758 return SDValue();
7759}
7760
7761SDValue SystemZTargetLowering::combineFP_ROUND(
7762 SDNode *N, DAGCombinerInfo &DCI) const {
7763
7764 if (!Subtarget.hasVector())
7765 return SDValue();
7766
7767 // (fpround (extract_vector_elt X 0))
7768 // (fpround (extract_vector_elt X 1)) ->
7769 // (extract_vector_elt (VROUND X) 0)
7770 // (extract_vector_elt (VROUND X) 2)
7771 //
7772 // This is a special case since the target doesn't really support v2f32s.
7773 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7774 SelectionDAG &DAG = DCI.DAG;
7775 SDValue Op0 = N->getOperand(OpNo);
7776 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
7778 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
7779 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7780 Op0.getConstantOperandVal(1) == 0) {
7781 SDValue Vec = Op0.getOperand(0);
7782 for (auto *U : Vec->users()) {
7783 if (U != Op0.getNode() && U->hasOneUse() &&
7784 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7785 U->getOperand(0) == Vec &&
7786 U->getOperand(1).getOpcode() == ISD::Constant &&
7787 U->getConstantOperandVal(1) == 1) {
7788 SDValue OtherRound = SDValue(*U->user_begin(), 0);
7789 if (OtherRound.getOpcode() == N->getOpcode() &&
7790 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
7791 OtherRound.getValueType() == MVT::f32) {
7792 SDValue VRound, Chain;
7793 if (N->isStrictFPOpcode()) {
7794 Chain = MergeInputChains(N, OtherRound.getNode());
7795 if (!Chain)
7796 continue;
7798 {MVT::v4f32, MVT::Other}, {Chain, Vec});
7799 Chain = VRound.getValue(1);
7800 } else
7801 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
7802 MVT::v4f32, Vec);
7803 DCI.AddToWorklist(VRound.getNode());
7804 SDValue Extract1 =
7805 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
7806 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
7807 DCI.AddToWorklist(Extract1.getNode());
7808 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
7809 if (Chain)
7810 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
7811 SDValue Extract0 =
7812 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
7813 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7814 if (Chain)
7815 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7816 N->getVTList(), Extract0, Chain);
7817 return Extract0;
7818 }
7819 }
7820 }
7821 }
7822 return SDValue();
7823}
7824
7825SDValue SystemZTargetLowering::combineFP_EXTEND(
7826 SDNode *N, DAGCombinerInfo &DCI) const {
7827
7828 if (!Subtarget.hasVector())
7829 return SDValue();
7830
7831 // (fpextend (extract_vector_elt X 0))
7832 // (fpextend (extract_vector_elt X 2)) ->
7833 // (extract_vector_elt (VEXTEND X) 0)
7834 // (extract_vector_elt (VEXTEND X) 1)
7835 //
7836 // This is a special case since the target doesn't really support v2f32s.
7837 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7838 SelectionDAG &DAG = DCI.DAG;
7839 SDValue Op0 = N->getOperand(OpNo);
7840 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
7842 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
7843 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7844 Op0.getConstantOperandVal(1) == 0) {
7845 SDValue Vec = Op0.getOperand(0);
7846 for (auto *U : Vec->users()) {
7847 if (U != Op0.getNode() && U->hasOneUse() &&
7848 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7849 U->getOperand(0) == Vec &&
7850 U->getOperand(1).getOpcode() == ISD::Constant &&
7851 U->getConstantOperandVal(1) == 2) {
7852 SDValue OtherExtend = SDValue(*U->user_begin(), 0);
7853 if (OtherExtend.getOpcode() == N->getOpcode() &&
7854 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
7855 OtherExtend.getValueType() == MVT::f64) {
7856 SDValue VExtend, Chain;
7857 if (N->isStrictFPOpcode()) {
7858 Chain = MergeInputChains(N, OtherExtend.getNode());
7859 if (!Chain)
7860 continue;
7861 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
7862 {MVT::v2f64, MVT::Other}, {Chain, Vec});
7863 Chain = VExtend.getValue(1);
7864 } else
7865 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
7866 MVT::v2f64, Vec);
7867 DCI.AddToWorklist(VExtend.getNode());
7868 SDValue Extract1 =
7869 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
7870 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
7871 DCI.AddToWorklist(Extract1.getNode());
7872 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
7873 if (Chain)
7874 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
7875 SDValue Extract0 =
7876 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
7877 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7878 if (Chain)
7879 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7880 N->getVTList(), Extract0, Chain);
7881 return Extract0;
7882 }
7883 }
7884 }
7885 }
7886 return SDValue();
7887}
7888
7889SDValue SystemZTargetLowering::combineINT_TO_FP(
7890 SDNode *N, DAGCombinerInfo &DCI) const {
7891 if (DCI.Level != BeforeLegalizeTypes)
7892 return SDValue();
7893 SelectionDAG &DAG = DCI.DAG;
7894 LLVMContext &Ctx = *DAG.getContext();
7895 unsigned Opcode = N->getOpcode();
7896 EVT OutVT = N->getValueType(0);
7897 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
7898 SDValue Op = N->getOperand(0);
7899 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
7900 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
7901
7902 // Insert an extension before type-legalization to avoid scalarization, e.g.:
7903 // v2f64 = uint_to_fp v2i16
7904 // =>
7905 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
7906 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
7907 OutScalarBits <= 64) {
7908 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
7909 EVT ExtVT = EVT::getVectorVT(
7910 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
7911 unsigned ExtOpcode =
7913 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
7914 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
7915 }
7916 return SDValue();
7917}
7918
7919SDValue SystemZTargetLowering::combineBSWAP(
7920 SDNode *N, DAGCombinerInfo &DCI) const {
7921 SelectionDAG &DAG = DCI.DAG;
7922 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
7923 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7924 N->getOperand(0).hasOneUse() &&
7925 canLoadStoreByteSwapped(N->getValueType(0))) {
7926 SDValue Load = N->getOperand(0);
7927 LoadSDNode *LD = cast<LoadSDNode>(Load);
7928
7929 // Create the byte-swapping load.
7930 SDValue Ops[] = {
7931 LD->getChain(), // Chain
7932 LD->getBasePtr() // Ptr
7933 };
7934 EVT LoadVT = N->getValueType(0);
7935 if (LoadVT == MVT::i16)
7936 LoadVT = MVT::i32;
7937 SDValue BSLoad =
7939 DAG.getVTList(LoadVT, MVT::Other),
7940 Ops, LD->getMemoryVT(), LD->getMemOperand());
7941
7942 // If this is an i16 load, insert the truncate.
7943 SDValue ResVal = BSLoad;
7944 if (N->getValueType(0) == MVT::i16)
7945 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
7946
7947 // First, combine the bswap away. This makes the value produced by the
7948 // load dead.
7949 DCI.CombineTo(N, ResVal);
7950
7951 // Next, combine the load away, we give it a bogus result value but a real
7952 // chain result. The result value is dead because the bswap is dead.
7953 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
7954
7955 // Return N so it doesn't get rechecked!
7956 return SDValue(N, 0);
7957 }
7958
7959 // Look through bitcasts that retain the number of vector elements.
7960 SDValue Op = N->getOperand(0);
7961 if (Op.getOpcode() == ISD::BITCAST &&
7962 Op.getValueType().isVector() &&
7963 Op.getOperand(0).getValueType().isVector() &&
7964 Op.getValueType().getVectorNumElements() ==
7965 Op.getOperand(0).getValueType().getVectorNumElements())
7966 Op = Op.getOperand(0);
7967
7968 // Push BSWAP into a vector insertion if at least one side then simplifies.
7969 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
7970 SDValue Vec = Op.getOperand(0);
7971 SDValue Elt = Op.getOperand(1);
7972 SDValue Idx = Op.getOperand(2);
7973
7975 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
7977 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
7978 (canLoadStoreByteSwapped(N->getValueType(0)) &&
7979 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
7980 EVT VecVT = N->getValueType(0);
7981 EVT EltVT = N->getValueType(0).getVectorElementType();
7982 if (VecVT != Vec.getValueType()) {
7983 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
7984 DCI.AddToWorklist(Vec.getNode());
7985 }
7986 if (EltVT != Elt.getValueType()) {
7987 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
7988 DCI.AddToWorklist(Elt.getNode());
7989 }
7990 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
7991 DCI.AddToWorklist(Vec.getNode());
7992 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
7993 DCI.AddToWorklist(Elt.getNode());
7994 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
7995 Vec, Elt, Idx);
7996 }
7997 }
7998
7999 // Push BSWAP into a vector shuffle if at least one side then simplifies.
8000 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
8001 if (SV && Op.hasOneUse()) {
8002 SDValue Op0 = Op.getOperand(0);
8003 SDValue Op1 = Op.getOperand(1);
8004
8006 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
8008 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
8009 EVT VecVT = N->getValueType(0);
8010 if (VecVT != Op0.getValueType()) {
8011 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
8012 DCI.AddToWorklist(Op0.getNode());
8013 }
8014 if (VecVT != Op1.getValueType()) {
8015 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
8016 DCI.AddToWorklist(Op1.getNode());
8017 }
8018 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
8019 DCI.AddToWorklist(Op0.getNode());
8020 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
8021 DCI.AddToWorklist(Op1.getNode());
8022 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
8023 }
8024 }
8025
8026 return SDValue();
8027}
8028
8029static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
8030 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
8031 // set by the CCReg instruction using the CCValid / CCMask masks,
8032 // If the CCReg instruction is itself a ICMP testing the condition
8033 // code set by some other instruction, see whether we can directly
8034 // use that condition code.
8035
8036 // Verify that we have an ICMP against some constant.
8037 if (CCValid != SystemZ::CCMASK_ICMP)
8038 return false;
8039 auto *ICmp = CCReg.getNode();
8040 if (ICmp->getOpcode() != SystemZISD::ICMP)
8041 return false;
8042 auto *CompareLHS = ICmp->getOperand(0).getNode();
8043 auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
8044 if (!CompareRHS)
8045 return false;
8046
8047 // Optimize the case where CompareLHS is a SELECT_CCMASK.
8048 if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
8049 // Verify that we have an appropriate mask for a EQ or NE comparison.
8050 bool Invert = false;
8051 if (CCMask == SystemZ::CCMASK_CMP_NE)
8052 Invert = !Invert;
8053 else if (CCMask != SystemZ::CCMASK_CMP_EQ)
8054 return false;
8055
8056 // Verify that the ICMP compares against one of select values.
8057 auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
8058 if (!TrueVal)
8059 return false;
8060 auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
8061 if (!FalseVal)
8062 return false;
8063 if (CompareRHS->getAPIntValue() == FalseVal->getAPIntValue())
8064 Invert = !Invert;
8065 else if (CompareRHS->getAPIntValue() != TrueVal->getAPIntValue())
8066 return false;
8067
8068 // Compute the effective CC mask for the new branch or select.
8069 auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
8070 auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
8071 if (!NewCCValid || !NewCCMask)
8072 return false;
8073 CCValid = NewCCValid->getZExtValue();
8074 CCMask = NewCCMask->getZExtValue();
8075 if (Invert)
8076 CCMask ^= CCValid;
8077
8078 // Return the updated CCReg link.
8079 CCReg = CompareLHS->getOperand(4);
8080 return true;
8081 }
8082
8083 // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
8084 if (CompareLHS->getOpcode() == ISD::SRA) {
8085 auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
8086 if (!SRACount || SRACount->getZExtValue() != 30)
8087 return false;
8088 auto *SHL = CompareLHS->getOperand(0).getNode();
8089 if (SHL->getOpcode() != ISD::SHL)
8090 return false;
8091 auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
8092 if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
8093 return false;
8094 auto *IPM = SHL->getOperand(0).getNode();
8095 if (IPM->getOpcode() != SystemZISD::IPM)
8096 return false;
8097
8098 // Avoid introducing CC spills (because SRA would clobber CC).
8099 if (!CompareLHS->hasOneUse())
8100 return false;
8101 // Verify that the ICMP compares against zero.
8102 if (CompareRHS->getZExtValue() != 0)
8103 return false;
8104
8105 // Compute the effective CC mask for the new branch or select.
8106 CCMask = SystemZ::reverseCCMask(CCMask);
8107
8108 // Return the updated CCReg link.
8109 CCReg = IPM->getOperand(0);
8110 return true;
8111 }
8112
8113 return false;
8114}
8115
8116SDValue SystemZTargetLowering::combineBR_CCMASK(
8117 SDNode *N, DAGCombinerInfo &DCI) const {
8118 SelectionDAG &DAG = DCI.DAG;
8119
8120 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
8121 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
8122 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
8123 if (!CCValid || !CCMask)
8124 return SDValue();
8125
8126 int CCValidVal = CCValid->getZExtValue();
8127 int CCMaskVal = CCMask->getZExtValue();
8128 SDValue Chain = N->getOperand(0);
8129 SDValue CCReg = N->getOperand(4);
8130
8131 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
8132 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
8133 Chain,
8134 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
8135 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
8136 N->getOperand(3), CCReg);
8137 return SDValue();
8138}
8139
8140SDValue SystemZTargetLowering::combineSELECT_CCMASK(
8141 SDNode *N, DAGCombinerInfo &DCI) const {
8142 SelectionDAG &DAG = DCI.DAG;
8143
8144 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
8145 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
8146 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
8147 if (!CCValid || !CCMask)
8148 return SDValue();
8149
8150 int CCValidVal = CCValid->getZExtValue();
8151 int CCMaskVal = CCMask->getZExtValue();
8152 SDValue CCReg = N->getOperand(4);
8153
8154 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
8155 return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
8156 N->getOperand(0), N->getOperand(1),
8157 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
8158 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
8159 CCReg);
8160 return SDValue();
8161}
8162
8163
8164SDValue SystemZTargetLowering::combineGET_CCMASK(
8165 SDNode *N, DAGCombinerInfo &DCI) const {
8166
8167 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
8168 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
8169 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
8170 if (!CCValid || !CCMask)
8171 return SDValue();
8172 int CCValidVal = CCValid->getZExtValue();
8173 int CCMaskVal = CCMask->getZExtValue();
8174
8175 SDValue Select = N->getOperand(0);
8176 if (Select->getOpcode() == ISD::TRUNCATE)
8177 Select = Select->getOperand(0);
8178 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
8179 return SDValue();
8180
8181 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
8182 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
8183 if (!SelectCCValid || !SelectCCMask)
8184 return SDValue();
8185 int SelectCCValidVal = SelectCCValid->getZExtValue();
8186 int SelectCCMaskVal = SelectCCMask->getZExtValue();
8187
8188 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
8189 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
8190 if (!TrueVal || !FalseVal)
8191 return SDValue();
8192 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
8193 ;
8194 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
8195 SelectCCMaskVal ^= SelectCCValidVal;
8196 else
8197 return SDValue();
8198
8199 if (SelectCCValidVal & ~CCValidVal)
8200 return SDValue();
8201 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
8202 return SDValue();
8203
8204 return Select->getOperand(4);
8205}
8206
8207SDValue SystemZTargetLowering::combineIntDIVREM(
8208 SDNode *N, DAGCombinerInfo &DCI) const {
8209 SelectionDAG &DAG = DCI.DAG;
8210 EVT VT = N->getValueType(0);
8211 // In the case where the divisor is a vector of constants a cheaper
8212 // sequence of instructions can replace the divide. BuildSDIV is called to
8213 // do this during DAG combining, but it only succeeds when it can build a
8214 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
8215 // since it is not Legal but Custom it can only happen before
8216 // legalization. Therefore we must scalarize this early before Combine
8217 // 1. For widened vectors, this is already the result of type legalization.
8218 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
8219 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
8220 return DAG.UnrollVectorOp(N);
8221 return SDValue();
8222}
8223
8224SDValue SystemZTargetLowering::combineINTRINSIC(
8225 SDNode *N, DAGCombinerInfo &DCI) const {
8226 SelectionDAG &DAG = DCI.DAG;
8227
8228 unsigned Id = N->getConstantOperandVal(1);
8229 switch (Id) {
8230 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
8231 // or larger is simply a vector load.
8232 case Intrinsic::s390_vll:
8233 case Intrinsic::s390_vlrl:
8234 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
8235 if (C->getZExtValue() >= 15)
8236 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
8237 N->getOperand(3), MachinePointerInfo());
8238 break;
8239 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
8240 case Intrinsic::s390_vstl:
8241 case Intrinsic::s390_vstrl:
8242 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
8243 if (C->getZExtValue() >= 15)
8244 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
8245 N->getOperand(4), MachinePointerInfo());
8246 break;
8247 }
8248
8249 return SDValue();
8250}
8251
8252SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
8253 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
8254 return N->getOperand(0);
8255 return N;
8256}
8257
8259 DAGCombinerInfo &DCI) const {
8260 switch(N->getOpcode()) {
8261 default: break;
8262 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
8263 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
8264 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
8266 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
8267 case ISD::LOAD: return combineLOAD(N, DCI);
8268 case ISD::STORE: return combineSTORE(N, DCI);
8269 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
8270 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
8271 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
8273 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
8275 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
8276 case ISD::SINT_TO_FP:
8277 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
8278 case ISD::BSWAP: return combineBSWAP(N, DCI);
8279 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
8280 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
8281 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
8282 case ISD::SDIV:
8283 case ISD::UDIV:
8284 case ISD::SREM:
8285 case ISD::UREM: return combineIntDIVREM(N, DCI);
8287 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
8288 }
8289
8290 return SDValue();
8291}
8292
8293// Return the demanded elements for the OpNo source operand of Op. DemandedElts
8294// are for Op.
8295static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
8296 unsigned OpNo) {
8297 EVT VT = Op.getValueType();
8298 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
8299 APInt SrcDemE;
8300 unsigned Opcode = Op.getOpcode();
8301 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
8302 unsigned Id = Op.getConstantOperandVal(0);
8303 switch (Id) {
8304 case Intrinsic::s390_vpksh: // PACKS
8305 case Intrinsic::s390_vpksf:
8306 case Intrinsic::s390_vpksg:
8307 case Intrinsic::s390_vpkshs: // PACKS_CC
8308 case Intrinsic::s390_vpksfs:
8309 case Intrinsic::s390_vpksgs:
8310 case Intrinsic::s390_vpklsh: // PACKLS
8311 case Intrinsic::s390_vpklsf:
8312 case Intrinsic::s390_vpklsg:
8313 case Intrinsic::s390_vpklshs: // PACKLS_CC
8314 case Intrinsic::s390_vpklsfs:
8315 case Intrinsic::s390_vpklsgs:
8316 // VECTOR PACK truncates the elements of two source vectors into one.
8317 SrcDemE = DemandedElts;
8318 if (OpNo == 2)
8319 SrcDemE.lshrInPlace(NumElts / 2);
8320 SrcDemE = SrcDemE.trunc(NumElts / 2);
8321 break;
8322 // VECTOR UNPACK extends half the elements of the source vector.
8323 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
8324 case Intrinsic::s390_vuphh:
8325 case Intrinsic::s390_vuphf:
8326 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
8327 case Intrinsic::s390_vuplhh:
8328 case Intrinsic::s390_vuplhf:
8329 SrcDemE = APInt(NumElts * 2, 0);
8330 SrcDemE.insertBits(DemandedElts, 0);
8331 break;
8332 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
8333 case Intrinsic::s390_vuplhw:
8334 case Intrinsic::s390_vuplf:
8335 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
8336 case Intrinsic::s390_vupllh:
8337 case Intrinsic::s390_vupllf:
8338 SrcDemE = APInt(NumElts * 2, 0);
8339 SrcDemE.insertBits(DemandedElts, NumElts);
8340 break;
8341 case Intrinsic::s390_vpdi: {
8342 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
8343 SrcDemE = APInt(NumElts, 0);
8344 if (!DemandedElts[OpNo - 1])
8345 break;
8346 unsigned Mask = Op.getConstantOperandVal(3);
8347 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
8348 // Demand input element 0 or 1, given by the mask bit value.
8349 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
8350 break;
8351 }
8352 case Intrinsic::s390_vsldb: {
8353 // VECTOR SHIFT LEFT DOUBLE BY BYTE
8354 assert(VT == MVT::v16i8 && "Unexpected type.");
8355 unsigned FirstIdx = Op.getConstantOperandVal(3);
8356 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
8357 unsigned NumSrc0Els = 16 - FirstIdx;
8358 SrcDemE = APInt(NumElts, 0);
8359 if (OpNo == 1) {
8360 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
8361 SrcDemE.insertBits(DemEls, FirstIdx);
8362 } else {
8363 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
8364 SrcDemE.insertBits(DemEls, 0);
8365 }
8366 break;
8367 }
8368 case Intrinsic::s390_vperm:
8369 SrcDemE = APInt::getAllOnes(NumElts);
8370 break;
8371 default:
8372 llvm_unreachable("Unhandled intrinsic.");
8373 break;
8374 }
8375 } else {
8376 switch (Opcode) {
8378 // Scalar operand.
8379 SrcDemE = APInt(1, 1);
8380 break;
8382 SrcDemE = DemandedElts;
8383 break;
8384 default:
8385 llvm_unreachable("Unhandled opcode.");
8386 break;
8387 }
8388 }
8389 return SrcDemE;
8390}
8391
8392static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
8393 const APInt &DemandedElts,
8394 const SelectionDAG &DAG, unsigned Depth,
8395 unsigned OpNo) {
8396 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
8397 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
8398 KnownBits LHSKnown =
8399 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
8400 KnownBits RHSKnown =
8401 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
8402 Known = LHSKnown.intersectWith(RHSKnown);
8403}
8404
8405void
8407 KnownBits &Known,
8408 const APInt &DemandedElts,
8409 const SelectionDAG &DAG,
8410 unsigned Depth) const {
8411 Known.resetAll();
8412
8413 // Intrinsic CC result is returned in the two low bits.
8414 unsigned tmp0, tmp1; // not used
8415 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) {
8416 Known.Zero.setBitsFrom(2);
8417 return;
8418 }
8419 EVT VT = Op.getValueType();
8420 if (Op.getResNo() != 0 || VT == MVT::Untyped)
8421 return;
8422 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
8423 "KnownBits does not match VT in bitwidth");
8424 assert ((!VT.isVector() ||
8425 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
8426 "DemandedElts does not match VT number of elements");
8427 unsigned BitWidth = Known.getBitWidth();
8428 unsigned Opcode = Op.getOpcode();
8429 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
8430 bool IsLogical = false;
8431 unsigned Id = Op.getConstantOperandVal(0);
8432 switch (Id) {
8433 case Intrinsic::s390_vpksh: // PACKS
8434 case Intrinsic::s390_vpksf:
8435 case Intrinsic::s390_vpksg:
8436 case Intrinsic::s390_vpkshs: // PACKS_CC
8437 case Intrinsic::s390_vpksfs:
8438 case Intrinsic::s390_vpksgs:
8439 case Intrinsic::s390_vpklsh: // PACKLS
8440 case Intrinsic::s390_vpklsf:
8441 case Intrinsic::s390_vpklsg:
8442 case Intrinsic::s390_vpklshs: // PACKLS_CC
8443 case Intrinsic::s390_vpklsfs:
8444 case Intrinsic::s390_vpklsgs:
8445 case Intrinsic::s390_vpdi:
8446 case Intrinsic::s390_vsldb:
8447 case Intrinsic::s390_vperm:
8448 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
8449 break;
8450 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
8451 case Intrinsic::s390_vuplhh:
8452 case Intrinsic::s390_vuplhf:
8453 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
8454 case Intrinsic::s390_vupllh:
8455 case Intrinsic::s390_vupllf:
8456 IsLogical = true;
8457 [[fallthrough]];
8458 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
8459 case Intrinsic::s390_vuphh:
8460 case Intrinsic::s390_vuphf:
8461 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
8462 case Intrinsic::s390_vuplhw:
8463 case Intrinsic::s390_vuplf: {
8464 SDValue SrcOp = Op.getOperand(1);
8465 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
8466 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
8467 if (IsLogical) {
8468 Known = Known.zext(BitWidth);
8469 } else
8470 Known = Known.sext(BitWidth);
8471 break;
8472 }
8473 default:
8474 break;
8475 }
8476 } else {
8477 switch (Opcode) {
8480 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
8481 break;
8482 case SystemZISD::REPLICATE: {
8483 SDValue SrcOp = Op.getOperand(0);
8484 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
8485 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
8486 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
8487 break;
8488 }
8489 default:
8490 break;
8491 }
8492 }
8493
8494 // Known has the width of the source operand(s). Adjust if needed to match
8495 // the passed bitwidth.
8496 if (Known.getBitWidth() != BitWidth)
8497 Known = Known.anyextOrTrunc(BitWidth);
8498}
8499
8500static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
8501 const SelectionDAG &DAG, unsigned Depth,
8502 unsigned OpNo) {
8503 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
8504 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
8505 if (LHS == 1) return 1; // Early out.
8506 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
8507 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
8508 if (RHS == 1) return 1; // Early out.
8509 unsigned Common = std::min(LHS, RHS);
8510 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
8511 EVT VT = Op.getValueType();
8512 unsigned VTBits = VT.getScalarSizeInBits();
8513 if (SrcBitWidth > VTBits) { // PACK
8514 unsigned SrcExtraBits = SrcBitWidth - VTBits;
8515 if (Common > SrcExtraBits)
8516 return (Common - SrcExtraBits);
8517 return 1;
8518 }
8519 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
8520 return Common;
8521}
8522
8523unsigned
8525 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
8526 unsigned Depth) const {
8527 if (Op.getResNo() != 0)
8528 return 1;
8529 unsigned Opcode = Op.getOpcode();
8530 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
8531 unsigned Id = Op.getConstantOperandVal(0);
8532 switch (Id) {
8533 case Intrinsic::s390_vpksh: // PACKS
8534 case Intrinsic::s390_vpksf:
8535 case Intrinsic::s390_vpksg:
8536 case Intrinsic::s390_vpkshs: // PACKS_CC
8537 case Intrinsic::s390_vpksfs:
8538 case Intrinsic::s390_vpksgs:
8539 case Intrinsic::s390_vpklsh: // PACKLS
8540 case Intrinsic::s390_vpklsf:
8541 case Intrinsic::s390_vpklsg:
8542 case Intrinsic::s390_vpklshs: // PACKLS_CC
8543 case Intrinsic::s390_vpklsfs:
8544 case Intrinsic::s390_vpklsgs:
8545 case Intrinsic::s390_vpdi:
8546 case Intrinsic::s390_vsldb:
8547 case Intrinsic::s390_vperm:
8548 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
8549 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
8550 case Intrinsic::s390_vuphh:
8551 case Intrinsic::s390_vuphf:
8552 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
8553 case Intrinsic::s390_vuplhw:
8554 case Intrinsic::s390_vuplf: {
8555 SDValue PackedOp = Op.getOperand(1);
8556 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
8557 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
8558 EVT VT = Op.getValueType();
8559 unsigned VTBits = VT.getScalarSizeInBits();
8560 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
8561 return Tmp;
8562 }
8563 default:
8564 break;
8565 }
8566 } else {
8567 switch (Opcode) {
8569 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
8570 default:
8571 break;
8572 }
8573 }
8574
8575 return 1;
8576}
8577
8580 const APInt &DemandedElts, const SelectionDAG &DAG,
8581 bool PoisonOnly, unsigned Depth) const {
8582 switch (Op->getOpcode()) {
8585 return true;
8586 }
8587 return false;
8588}
8589
8590unsigned
8592 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
8593 unsigned StackAlign = TFI->getStackAlignment();
8594 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
8595 "Unexpected stack alignment");
8596 // The default stack probe size is 4096 if the function has no
8597 // stack-probe-size attribute.
8598 unsigned StackProbeSize =
8599 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
8600 // Round down to the stack alignment.
8601 StackProbeSize &= ~(StackAlign - 1);
8602 return StackProbeSize ? StackProbeSize : StackAlign;
8603}
8604
8605//===----------------------------------------------------------------------===//
8606// Custom insertion
8607//===----------------------------------------------------------------------===//
8608
8609// Force base value Base into a register before MI. Return the register.
8611 const SystemZInstrInfo *TII) {
8612 MachineBasicBlock *MBB = MI.getParent();
8613 MachineFunction &MF = *MBB->getParent();
8615
8616 if (Base.isReg()) {
8617 // Copy Base into a new virtual register to help register coalescing in
8618 // cases with multiple uses.
8619 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8620 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
8621 .add(Base);
8622 return Reg;
8623 }
8624
8625 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8626 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
8627 .add(Base)
8628 .addImm(0)
8629 .addReg(0);
8630 return Reg;
8631}
8632
8633// The CC operand of MI might be missing a kill marker because there
8634// were multiple uses of CC, and ISel didn't know which to mark.
8635// Figure out whether MI should have had a kill marker.
8637 // Scan forward through BB for a use/def of CC.
8639 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
8640 const MachineInstr& mi = *miI;
8641 if (mi.readsRegister(SystemZ::CC, /*TRI=*/nullptr))
8642 return false;
8643 if (mi.definesRegister(SystemZ::CC, /*TRI=*/nullptr))
8644 break; // Should have kill-flag - update below.
8645 }
8646
8647 // If we hit the end of the block, check whether CC is live into a
8648 // successor.
8649 if (miI == MBB->end()) {
8650 for (const MachineBasicBlock *Succ : MBB->successors())
8651 if (Succ->isLiveIn(SystemZ::CC))
8652 return false;
8653 }
8654
8655 return true;
8656}
8657
8658// Return true if it is OK for this Select pseudo-opcode to be cascaded
8659// together with other Select pseudo-opcodes into a single basic-block with
8660// a conditional jump around it.
8662 switch (MI.getOpcode()) {
8663 case SystemZ::Select32:
8664 case SystemZ::Select64:
8665 case SystemZ::Select128:
8666 case SystemZ::SelectF32:
8667 case SystemZ::SelectF64:
8668 case SystemZ::SelectF128:
8669 case SystemZ::SelectVR32:
8670 case SystemZ::SelectVR64:
8671 case SystemZ::SelectVR128:
8672 return true;
8673
8674 default:
8675 return false;
8676 }
8677}
8678
8679// Helper function, which inserts PHI functions into SinkMBB:
8680// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
8681// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
8683 MachineBasicBlock *TrueMBB,
8684 MachineBasicBlock *FalseMBB,
8685 MachineBasicBlock *SinkMBB) {
8686 MachineFunction *MF = TrueMBB->getParent();
8688
8689 MachineInstr *FirstMI = Selects.front();
8690 unsigned CCValid = FirstMI->getOperand(3).getImm();
8691 unsigned CCMask = FirstMI->getOperand(4).getImm();
8692
8693 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
8694
8695 // As we are creating the PHIs, we have to be careful if there is more than
8696 // one. Later Selects may reference the results of earlier Selects, but later
8697 // PHIs have to reference the individual true/false inputs from earlier PHIs.
8698 // That also means that PHI construction must work forward from earlier to
8699 // later, and that the code must maintain a mapping from earlier PHI's
8700 // destination registers, and the registers that went into the PHI.
8702
8703 for (auto *MI : Selects) {
8704 Register DestReg = MI->getOperand(0).getReg();
8705 Register TrueReg = MI->getOperand(1).getReg();
8706 Register FalseReg = MI->getOperand(2).getReg();
8707
8708 // If this Select we are generating is the opposite condition from
8709 // the jump we generated, then we have to swap the operands for the
8710 // PHI that is going to be generated.
8711 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
8712 std::swap(TrueReg, FalseReg);
8713
8714 if (auto It = RegRewriteTable.find(TrueReg); It != RegRewriteTable.end())
8715 TrueReg = It->second.first;
8716
8717 if (auto It = RegRewriteTable.find(FalseReg); It != RegRewriteTable.end())
8718 FalseReg = It->second.second;
8719
8720 DebugLoc DL = MI->getDebugLoc();
8721 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
8722 .addReg(TrueReg).addMBB(TrueMBB)
8723 .addReg(FalseReg).addMBB(FalseMBB);
8724
8725 // Add this PHI to the rewrite table.
8726 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
8727 }
8728
8730}
8731
8733SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
8734 MachineBasicBlock *BB) const {
8735 MachineFunction &MF = *BB->getParent();
8736 MachineFrameInfo &MFI = MF.getFrameInfo();
8737 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
8738 assert(TFL->hasReservedCallFrame(MF) &&
8739 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
8740 (void)TFL;
8741 // Get the MaxCallFrameSize value and erase MI since it serves no further
8742 // purpose as the call frame is statically reserved in the prolog. Set
8743 // AdjustsStack as MI is *not* mapped as a frame instruction.
8744 uint32_t NumBytes = MI.getOperand(0).getImm();
8745 if (NumBytes > MFI.getMaxCallFrameSize())
8746 MFI.setMaxCallFrameSize(NumBytes);
8747 MFI.setAdjustsStack(true);
8748
8749 MI.eraseFromParent();
8750 return BB;
8751}
8752
8753// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
8755SystemZTargetLowering::emitSelect(MachineInstr &MI,
8756 MachineBasicBlock *MBB) const {
8757 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
8758 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8759
8760 unsigned CCValid = MI.getOperand(3).getImm();
8761 unsigned CCMask = MI.getOperand(4).getImm();
8762
8763 // If we have a sequence of Select* pseudo instructions using the
8764 // same condition code value, we want to expand all of them into
8765 // a single pair of basic blocks using the same condition.
8768 Selects.push_back(&MI);
8769 unsigned Count = 0;
8770 for (MachineInstr &NextMI : llvm::make_range(
8771 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
8772 if (isSelectPseudo(NextMI)) {
8773 assert(NextMI.getOperand(3).getImm() == CCValid &&
8774 "Bad CCValid operands since CC was not redefined.");
8775 if (NextMI.getOperand(4).getImm() == CCMask ||
8776 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
8777 Selects.push_back(&NextMI);
8778 continue;
8779 }
8780 break;
8781 }
8782 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) ||
8783 NextMI.usesCustomInsertionHook())
8784 break;
8785 bool User = false;
8786 for (auto *SelMI : Selects)
8787 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
8788 User = true;
8789 break;
8790 }
8791 if (NextMI.isDebugInstr()) {
8792 if (User) {
8793 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
8794 DbgValues.push_back(&NextMI);
8795 }
8796 } else if (User || ++Count > 20)
8797 break;
8798 }
8799
8800 MachineInstr *LastMI = Selects.back();
8801 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) ||
8802 checkCCKill(*LastMI, MBB));
8803 MachineBasicBlock *StartMBB = MBB;
8805 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8806
8807 // Unless CC was killed in the last Select instruction, mark it as
8808 // live-in to both FalseMBB and JoinMBB.
8809 if (!CCKilled) {
8810 FalseMBB->addLiveIn(SystemZ::CC);
8811 JoinMBB->addLiveIn(SystemZ::CC);
8812 }
8813
8814 // StartMBB:
8815 // BRC CCMask, JoinMBB
8816 // # fallthrough to FalseMBB
8817 MBB = StartMBB;
8818 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8819 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8820 MBB->addSuccessor(JoinMBB);
8821 MBB->addSuccessor(FalseMBB);
8822
8823 // FalseMBB:
8824 // # fallthrough to JoinMBB
8825 MBB = FalseMBB;
8826 MBB->addSuccessor(JoinMBB);
8827
8828 // JoinMBB:
8829 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
8830 // ...
8831 MBB = JoinMBB;
8832 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
8833 for (auto *SelMI : Selects)
8834 SelMI->eraseFromParent();
8835
8837 for (auto *DbgMI : DbgValues)
8838 MBB->splice(InsertPos, StartMBB, DbgMI);
8839
8840 return JoinMBB;
8841}
8842
8843// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
8844// StoreOpcode is the store to use and Invert says whether the store should
8845// happen when the condition is false rather than true. If a STORE ON
8846// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
8847MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
8849 unsigned StoreOpcode,
8850 unsigned STOCOpcode,
8851 bool Invert) const {
8852 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8853
8854 Register SrcReg = MI.getOperand(0).getReg();
8855 MachineOperand Base = MI.getOperand(1);
8856 int64_t Disp = MI.getOperand(2).getImm();
8857 Register IndexReg = MI.getOperand(3).getReg();
8858 unsigned CCValid = MI.getOperand(4).getImm();
8859 unsigned CCMask = MI.getOperand(5).getImm();
8860 DebugLoc DL = MI.getDebugLoc();
8861
8862 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
8863
8864 // ISel pattern matching also adds a load memory operand of the same
8865 // address, so take special care to find the storing memory operand.
8866 MachineMemOperand *MMO = nullptr;
8867 for (auto *I : MI.memoperands())
8868 if (I->isStore()) {
8869 MMO = I;
8870 break;
8871 }
8872
8873 // Use STOCOpcode if possible. We could use different store patterns in
8874 // order to avoid matching the index register, but the performance trade-offs
8875 // might be more complicated in that case.
8876 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
8877 if (Invert)
8878 CCMask ^= CCValid;
8879
8880 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
8881 .addReg(SrcReg)
8882 .add(Base)
8883 .addImm(Disp)
8884 .addImm(CCValid)
8885 .addImm(CCMask)
8886 .addMemOperand(MMO);
8887
8888 MI.eraseFromParent();
8889 return MBB;
8890 }
8891
8892 // Get the condition needed to branch around the store.
8893 if (!Invert)
8894 CCMask ^= CCValid;
8895
8896 MachineBasicBlock *StartMBB = MBB;
8898 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8899
8900 // Unless CC was killed in the CondStore instruction, mark it as
8901 // live-in to both FalseMBB and JoinMBB.
8902 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) &&
8903 !checkCCKill(MI, JoinMBB)) {
8904 FalseMBB->addLiveIn(SystemZ::CC);
8905 JoinMBB->addLiveIn(SystemZ::CC);
8906 }
8907
8908 // StartMBB:
8909 // BRC CCMask, JoinMBB
8910 // # fallthrough to FalseMBB
8911 MBB = StartMBB;
8912 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8913 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8914 MBB->addSuccessor(JoinMBB);
8915 MBB->addSuccessor(FalseMBB);
8916
8917 // FalseMBB:
8918 // store %SrcReg, %Disp(%Index,%Base)
8919 // # fallthrough to JoinMBB
8920 MBB = FalseMBB;
8921 BuildMI(MBB, DL, TII->get(StoreOpcode))
8922 .addReg(SrcReg)
8923 .add(Base)
8924 .addImm(Disp)
8925 .addReg(IndexReg)
8926 .addMemOperand(MMO);
8927 MBB->addSuccessor(JoinMBB);
8928
8929 MI.eraseFromParent();
8930 return JoinMBB;
8931}
8932
8933// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
8935SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
8937 bool Unsigned) const {
8938 MachineFunction &MF = *MBB->getParent();
8939 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8941
8942 // Synthetic instruction to compare 128-bit values.
8943 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
8944 Register Op0 = MI.getOperand(0).getReg();
8945 Register Op1 = MI.getOperand(1).getReg();
8946
8947 MachineBasicBlock *StartMBB = MBB;
8949 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
8950
8951 // StartMBB:
8952 //
8953 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
8954 // Swap the inputs to get:
8955 // CC 1 if high(Op0) > high(Op1)
8956 // CC 2 if high(Op0) < high(Op1)
8957 // CC 0 if high(Op0) == high(Op1)
8958 //
8959 // If CC != 0, we'd done, so jump over the next instruction.
8960 //
8961 // VEC[L]G Op1, Op0
8962 // JNE JoinMBB
8963 // # fallthrough to HiEqMBB
8964 MBB = StartMBB;
8965 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
8966 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
8967 .addReg(Op1).addReg(Op0);
8968 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8970 MBB->addSuccessor(JoinMBB);
8971 MBB->addSuccessor(HiEqMBB);
8972
8973 // HiEqMBB:
8974 //
8975 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
8976 // Since we already know the high parts are equal, the CC
8977 // result will only depend on the low parts:
8978 // CC 1 if low(Op0) > low(Op1)
8979 // CC 3 if low(Op0) <= low(Op1)
8980 //
8981 // VCHLGS Tmp, Op0, Op1
8982 // # fallthrough to JoinMBB
8983 MBB = HiEqMBB;
8984 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
8985 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
8986 .addReg(Op0).addReg(Op1);
8987 MBB->addSuccessor(JoinMBB);
8988
8989 // Mark CC as live-in to JoinMBB.
8990 JoinMBB->addLiveIn(SystemZ::CC);
8991
8992 MI.eraseFromParent();
8993 return JoinMBB;
8994}
8995
8996// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
8997// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
8998// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
8999// whether the field should be inverted after performing BinOpcode (e.g. for
9000// NAND).
9001MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
9002 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
9003 bool Invert) const {
9004 MachineFunction &MF = *MBB->getParent();
9005 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9007
9008 // Extract the operands. Base can be a register or a frame index.
9009 // Src2 can be a register or immediate.
9010 Register Dest = MI.getOperand(0).getReg();
9011 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
9012 int64_t Disp = MI.getOperand(2).getImm();
9013 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
9014 Register BitShift = MI.getOperand(4).getReg();
9015 Register NegBitShift = MI.getOperand(5).getReg();
9016 unsigned BitSize = MI.getOperand(6).getImm();
9017 DebugLoc DL = MI.getDebugLoc();
9018
9019 // Get the right opcodes for the displacement.
9020 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
9021 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
9022 assert(LOpcode && CSOpcode && "Displacement out of range");
9023
9024 // Create virtual registers for temporary results.
9025 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9026 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9027 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9028 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9029 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9030
9031 // Insert a basic block for the main loop.
9032 MachineBasicBlock *StartMBB = MBB;
9034 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9035
9036 // StartMBB:
9037 // ...
9038 // %OrigVal = L Disp(%Base)
9039 // # fall through to LoopMBB
9040 MBB = StartMBB;
9041 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
9042 MBB->addSuccessor(LoopMBB);
9043
9044 // LoopMBB:
9045 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
9046 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
9047 // %RotatedNewVal = OP %RotatedOldVal, %Src2
9048 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
9049 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
9050 // JNE LoopMBB
9051 // # fall through to DoneMBB
9052 MBB = LoopMBB;
9053 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
9054 .addReg(OrigVal).addMBB(StartMBB)
9055 .addReg(Dest).addMBB(LoopMBB);
9056 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
9057 .addReg(OldVal).addReg(BitShift).addImm(0);
9058 if (Invert) {
9059 // Perform the operation normally and then invert every bit of the field.
9060 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9061 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
9062 // XILF with the upper BitSize bits set.
9063 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
9064 .addReg(Tmp).addImm(-1U << (32 - BitSize));
9065 } else if (BinOpcode)
9066 // A simply binary operation.
9067 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
9068 .addReg(RotatedOldVal)
9069 .add(Src2);
9070 else
9071 // Use RISBG to rotate Src2 into position and use it to replace the
9072 // field in RotatedOldVal.
9073 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
9074 .addReg(RotatedOldVal).addReg(Src2.getReg())
9075 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
9076 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
9077 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
9078 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
9079 .addReg(OldVal)
9080 .addReg(NewVal)
9081 .add(Base)
9082 .addImm(Disp);
9083 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9085 MBB->addSuccessor(LoopMBB);
9086 MBB->addSuccessor(DoneMBB);
9087
9088 MI.eraseFromParent();
9089 return DoneMBB;
9090}
9091
9092// Implement EmitInstrWithCustomInserter for subword pseudo
9093// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
9094// instruction that should be used to compare the current field with the
9095// minimum or maximum value. KeepOldMask is the BRC condition-code mask
9096// for when the current field should be kept.
9097MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
9098 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
9099 unsigned KeepOldMask) const {
9100 MachineFunction &MF = *MBB->getParent();
9101 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9103
9104 // Extract the operands. Base can be a register or a frame index.
9105 Register Dest = MI.getOperand(0).getReg();
9106 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
9107 int64_t Disp = MI.getOperand(2).getImm();
9108 Register Src2 = MI.getOperand(3).getReg();
9109 Register BitShift = MI.getOperand(4).getReg();
9110 Register NegBitShift = MI.getOperand(5).getReg();
9111 unsigned BitSize = MI.getOperand(6).getImm();
9112 DebugLoc DL = MI.getDebugLoc();
9113
9114 // Get the right opcodes for the displacement.
9115 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
9116 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
9117 assert(LOpcode && CSOpcode && "Displacement out of range");
9118
9119 // Create virtual registers for temporary results.
9120 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9121 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9122 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9123 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9124 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9125 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9126
9127 // Insert 3 basic blocks for the loop.
9128 MachineBasicBlock *StartMBB = MBB;
9130 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9131 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
9132 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
9133
9134 // StartMBB:
9135 // ...
9136 // %OrigVal = L Disp(%Base)
9137 // # fall through to LoopMBB
9138 MBB = StartMBB;
9139 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
9140 MBB->addSuccessor(LoopMBB);
9141
9142 // LoopMBB:
9143 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
9144 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
9145 // CompareOpcode %RotatedOldVal, %Src2
9146 // BRC KeepOldMask, UpdateMBB
9147 MBB = LoopMBB;
9148 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
9149 .addReg(OrigVal).addMBB(StartMBB)
9150 .addReg(Dest).addMBB(UpdateMBB);
9151 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
9152 .addReg(OldVal).addReg(BitShift).addImm(0);
9153 BuildMI(MBB, DL, TII->get(CompareOpcode))
9154 .addReg(RotatedOldVal).addReg(Src2);
9155 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9156 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
9157 MBB->addSuccessor(UpdateMBB);
9158 MBB->addSuccessor(UseAltMBB);
9159
9160 // UseAltMBB:
9161 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
9162 // # fall through to UpdateMBB
9163 MBB = UseAltMBB;
9164 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
9165 .addReg(RotatedOldVal).addReg(Src2)
9166 .addImm(32).addImm(31 + BitSize).addImm(0);
9167 MBB->addSuccessor(UpdateMBB);
9168
9169 // UpdateMBB:
9170 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
9171 // [ %RotatedAltVal, UseAltMBB ]
9172 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
9173 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
9174 // JNE LoopMBB
9175 // # fall through to DoneMBB
9176 MBB = UpdateMBB;
9177 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
9178 .addReg(RotatedOldVal).addMBB(LoopMBB)
9179 .addReg(RotatedAltVal).addMBB(UseAltMBB);
9180 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
9181 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
9182 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
9183 .addReg(OldVal)
9184 .addReg(NewVal)
9185 .add(Base)
9186 .addImm(Disp);
9187 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9189 MBB->addSuccessor(LoopMBB);
9190 MBB->addSuccessor(DoneMBB);
9191
9192 MI.eraseFromParent();
9193 return DoneMBB;
9194}
9195
9196// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
9197// instruction MI.
9199SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
9200 MachineBasicBlock *MBB) const {
9201 MachineFunction &MF = *MBB->getParent();
9202 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9204
9205 // Extract the operands. Base can be a register or a frame index.
9206 Register Dest = MI.getOperand(0).getReg();
9207 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
9208 int64_t Disp = MI.getOperand(2).getImm();
9209 Register CmpVal = MI.getOperand(3).getReg();
9210 Register OrigSwapVal = MI.getOperand(4).getReg();
9211 Register BitShift = MI.getOperand(5).getReg();
9212 Register NegBitShift = MI.getOperand(6).getReg();
9213 int64_t BitSize = MI.getOperand(7).getImm();
9214 DebugLoc DL = MI.getDebugLoc();
9215
9216 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
9217
9218 // Get the right opcodes for the displacement and zero-extension.
9219 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
9220 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
9221 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
9222 assert(LOpcode && CSOpcode && "Displacement out of range");
9223
9224 // Create virtual registers for temporary results.
9225 Register OrigOldVal = MRI.createVirtualRegister(RC);
9226 Register OldVal = MRI.createVirtualRegister(RC);
9227 Register SwapVal = MRI.createVirtualRegister(RC);
9228 Register StoreVal = MRI.createVirtualRegister(RC);
9229 Register OldValRot = MRI.createVirtualRegister(RC);
9230 Register RetryOldVal = MRI.createVirtualRegister(RC);
9231 Register RetrySwapVal = MRI.createVirtualRegister(RC);
9232
9233 // Insert 2 basic blocks for the loop.
9234 MachineBasicBlock *StartMBB = MBB;
9236 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9237 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
9238
9239 // StartMBB:
9240 // ...
9241 // %OrigOldVal = L Disp(%Base)
9242 // # fall through to LoopMBB
9243 MBB = StartMBB;
9244 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
9245 .add(Base)
9246 .addImm(Disp)
9247 .addReg(0);
9248 MBB->addSuccessor(LoopMBB);
9249
9250 // LoopMBB:
9251 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
9252 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
9253 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
9254 // ^^ The low BitSize bits contain the field
9255 // of interest.
9256 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
9257 // ^^ Replace the upper 32-BitSize bits of the
9258 // swap value with those that we loaded and rotated.
9259 // %Dest = LL[CH] %OldValRot
9260 // CR %Dest, %CmpVal
9261 // JNE DoneMBB
9262 // # Fall through to SetMBB
9263 MBB = LoopMBB;
9264 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
9265 .addReg(OrigOldVal).addMBB(StartMBB)
9266 .addReg(RetryOldVal).addMBB(SetMBB);
9267 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
9268 .addReg(OrigSwapVal).addMBB(StartMBB)
9269 .addReg(RetrySwapVal).addMBB(SetMBB);
9270 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
9271 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
9272 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
9273 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
9274 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
9275 .addReg(OldValRot);
9276 BuildMI(MBB, DL, TII->get(SystemZ::CR))
9277 .addReg(Dest).addReg(CmpVal);
9278 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9281 MBB->addSuccessor(DoneMBB);
9282 MBB->addSuccessor(SetMBB);
9283
9284 // SetMBB:
9285 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
9286 // ^^ Rotate the new field to its proper position.
9287 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
9288 // JNE LoopMBB
9289 // # fall through to ExitMBB
9290 MBB = SetMBB;
9291 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
9292 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
9293 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
9294 .addReg(OldVal)
9295 .addReg(StoreVal)
9296 .add(Base)
9297 .addImm(Disp);
9298 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9300 MBB->addSuccessor(LoopMBB);
9301 MBB->addSuccessor(DoneMBB);
9302
9303 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
9304 // to the block after the loop. At this point, CC may have been defined
9305 // either by the CR in LoopMBB or by the CS in SetMBB.
9306 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))
9307 DoneMBB->addLiveIn(SystemZ::CC);
9308
9309 MI.eraseFromParent();
9310 return DoneMBB;
9311}
9312
9313// Emit a move from two GR64s to a GR128.
9315SystemZTargetLowering::emitPair128(MachineInstr &MI,
9316 MachineBasicBlock *MBB) const {
9317 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9318 const DebugLoc &DL = MI.getDebugLoc();
9319
9320 Register Dest = MI.getOperand(0).getReg();
9321 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest)
9322 .add(MI.getOperand(1))
9323 .addImm(SystemZ::subreg_h64)
9324 .add(MI.getOperand(2))
9325 .addImm(SystemZ::subreg_l64);
9326 MI.eraseFromParent();
9327 return MBB;
9328}
9329
9330// Emit an extension from a GR64 to a GR128. ClearEven is true
9331// if the high register of the GR128 value must be cleared or false if
9332// it's "don't care".
9333MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
9335 bool ClearEven) const {
9336 MachineFunction &MF = *MBB->getParent();
9337 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9339 DebugLoc DL = MI.getDebugLoc();
9340
9341 Register Dest = MI.getOperand(0).getReg();
9342 Register Src = MI.getOperand(1).getReg();
9343 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
9344
9345 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
9346 if (ClearEven) {
9347 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
9348 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
9349
9350 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
9351 .addImm(0);
9352 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
9353 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
9354 In128 = NewIn128;
9355 }
9356 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
9357 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
9358
9359 MI.eraseFromParent();
9360 return MBB;
9361}
9362
9364SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
9366 unsigned Opcode, bool IsMemset) const {
9367 MachineFunction &MF = *MBB->getParent();
9368 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9370 DebugLoc DL = MI.getDebugLoc();
9371
9372 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
9373 uint64_t DestDisp = MI.getOperand(1).getImm();
9374 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
9375 uint64_t SrcDisp;
9376
9377 // Fold the displacement Disp if it is out of range.
9378 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
9379 if (!isUInt<12>(Disp)) {
9380 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9381 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
9382 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
9383 .add(Base).addImm(Disp).addReg(0);
9384 Base = MachineOperand::CreateReg(Reg, false);
9385 Disp = 0;
9386 }
9387 };
9388
9389 if (!IsMemset) {
9390 SrcBase = earlyUseOperand(MI.getOperand(2));
9391 SrcDisp = MI.getOperand(3).getImm();
9392 } else {
9393 SrcBase = DestBase;
9394 SrcDisp = DestDisp++;
9395 foldDisplIfNeeded(DestBase, DestDisp);
9396 }
9397
9398 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
9399 bool IsImmForm = LengthMO.isImm();
9400 bool IsRegForm = !IsImmForm;
9401
9402 // Build and insert one Opcode of Length, with special treatment for memset.
9403 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
9405 MachineOperand DBase, uint64_t DDisp,
9407 unsigned Length) -> void {
9408 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
9409 if (IsMemset) {
9410 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
9411 if (ByteMO.isImm())
9412 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
9413 .add(SBase).addImm(SDisp).add(ByteMO);
9414 else
9415 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
9416 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
9417 if (--Length == 0)
9418 return;
9419 }
9420 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
9421 .add(DBase).addImm(DDisp).addImm(Length)
9422 .add(SBase).addImm(SDisp)
9423 .setMemRefs(MI.memoperands());
9424 };
9425
9426 bool NeedsLoop = false;
9427 uint64_t ImmLength = 0;
9428 Register LenAdjReg = SystemZ::NoRegister;
9429 if (IsImmForm) {
9430 ImmLength = LengthMO.getImm();
9431 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
9432 if (ImmLength == 0) {
9433 MI.eraseFromParent();
9434 return MBB;
9435 }
9436 if (Opcode == SystemZ::CLC) {
9437 if (ImmLength > 3 * 256)
9438 // A two-CLC sequence is a clear win over a loop, not least because
9439 // it needs only one branch. A three-CLC sequence needs the same
9440 // number of branches as a loop (i.e. 2), but is shorter. That
9441 // brings us to lengths greater than 768 bytes. It seems relatively
9442 // likely that a difference will be found within the first 768 bytes,
9443 // so we just optimize for the smallest number of branch
9444 // instructions, in order to avoid polluting the prediction buffer
9445 // too much.
9446 NeedsLoop = true;
9447 } else if (ImmLength > 6 * 256)
9448 // The heuristic we use is to prefer loops for anything that would
9449 // require 7 or more MVCs. With these kinds of sizes there isn't much
9450 // to choose between straight-line code and looping code, since the
9451 // time will be dominated by the MVCs themselves.
9452 NeedsLoop = true;
9453 } else {
9454 NeedsLoop = true;
9455 LenAdjReg = LengthMO.getReg();
9456 }
9457
9458 // When generating more than one CLC, all but the last will need to
9459 // branch to the end when a difference is found.
9460 MachineBasicBlock *EndMBB =
9461 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
9463 : nullptr);
9464
9465 if (NeedsLoop) {
9466 Register StartCountReg =
9467 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
9468 if (IsImmForm) {
9469 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
9470 ImmLength &= 255;
9471 } else {
9472 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
9473 .addReg(LenAdjReg)
9474 .addReg(0)
9475 .addImm(8);
9476 }
9477
9478 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
9479 auto loadZeroAddress = [&]() -> MachineOperand {
9480 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9481 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
9482 return MachineOperand::CreateReg(Reg, false);
9483 };
9484 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
9485 DestBase = loadZeroAddress();
9486 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
9487 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
9488
9489 MachineBasicBlock *StartMBB = nullptr;
9490 MachineBasicBlock *LoopMBB = nullptr;
9491 MachineBasicBlock *NextMBB = nullptr;
9492 MachineBasicBlock *DoneMBB = nullptr;
9493 MachineBasicBlock *AllDoneMBB = nullptr;
9494
9495 Register StartSrcReg = forceReg(MI, SrcBase, TII);
9496 Register StartDestReg =
9497 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
9498
9499 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
9500 Register ThisSrcReg = MRI.createVirtualRegister(RC);
9501 Register ThisDestReg =
9502 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
9503 Register NextSrcReg = MRI.createVirtualRegister(RC);
9504 Register NextDestReg =
9505 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
9506 RC = &SystemZ::GR64BitRegClass;
9507 Register ThisCountReg = MRI.createVirtualRegister(RC);
9508 Register NextCountReg = MRI.createVirtualRegister(RC);
9509
9510 if (IsRegForm) {
9511 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
9512 StartMBB = SystemZ::emitBlockAfter(MBB);
9513 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9514 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
9515 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
9516
9517 // MBB:
9518 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
9519 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9520 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
9521 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9523 .addMBB(AllDoneMBB);
9524 MBB->addSuccessor(AllDoneMBB);
9525 if (!IsMemset)
9526 MBB->addSuccessor(StartMBB);
9527 else {
9528 // MemsetOneCheckMBB:
9529 // # Jump to MemsetOneMBB for a memset of length 1, or
9530 // # fall thru to StartMBB.
9531 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
9532 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
9533 MBB->addSuccessor(MemsetOneCheckMBB);
9534 MBB = MemsetOneCheckMBB;
9535 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9536 .addReg(LenAdjReg).addImm(-1);
9537 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9539 .addMBB(MemsetOneMBB);
9540 MBB->addSuccessor(MemsetOneMBB, {10, 100});
9541 MBB->addSuccessor(StartMBB, {90, 100});
9542
9543 // MemsetOneMBB:
9544 // # Jump back to AllDoneMBB after a single MVI or STC.
9545 MBB = MemsetOneMBB;
9546 insertMemMemOp(MBB, MBB->end(),
9547 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
9548 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
9549 1);
9550 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
9551 MBB->addSuccessor(AllDoneMBB);
9552 }
9553
9554 // StartMBB:
9555 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
9556 MBB = StartMBB;
9557 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9558 .addReg(StartCountReg).addImm(0);
9559 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9561 .addMBB(DoneMBB);
9562 MBB->addSuccessor(DoneMBB);
9563 MBB->addSuccessor(LoopMBB);
9564 }
9565 else {
9566 StartMBB = MBB;
9567 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
9568 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9569 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
9570
9571 // StartMBB:
9572 // # fall through to LoopMBB
9573 MBB->addSuccessor(LoopMBB);
9574
9575 DestBase = MachineOperand::CreateReg(NextDestReg, false);
9576 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
9577 if (EndMBB && !ImmLength)
9578 // If the loop handled the whole CLC range, DoneMBB will be empty with
9579 // CC live-through into EndMBB, so add it as live-in.
9580 DoneMBB->addLiveIn(SystemZ::CC);
9581 }
9582
9583 // LoopMBB:
9584 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
9585 // [ %NextDestReg, NextMBB ]
9586 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
9587 // [ %NextSrcReg, NextMBB ]
9588 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
9589 // [ %NextCountReg, NextMBB ]
9590 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
9591 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
9592 // ( JLH EndMBB )
9593 //
9594 // The prefetch is used only for MVC. The JLH is used only for CLC.
9595 MBB = LoopMBB;
9596 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
9597 .addReg(StartDestReg).addMBB(StartMBB)
9598 .addReg(NextDestReg).addMBB(NextMBB);
9599 if (!HaveSingleBase)
9600 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
9601 .addReg(StartSrcReg).addMBB(StartMBB)
9602 .addReg(NextSrcReg).addMBB(NextMBB);
9603 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
9604 .addReg(StartCountReg).addMBB(StartMBB)
9605 .addReg(NextCountReg).addMBB(NextMBB);
9606 if (Opcode == SystemZ::MVC)
9607 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
9609 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
9610 insertMemMemOp(MBB, MBB->end(),
9611 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
9612 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
9613 if (EndMBB) {
9614 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9616 .addMBB(EndMBB);
9617 MBB->addSuccessor(EndMBB);
9618 MBB->addSuccessor(NextMBB);
9619 }
9620
9621 // NextMBB:
9622 // %NextDestReg = LA 256(%ThisDestReg)
9623 // %NextSrcReg = LA 256(%ThisSrcReg)
9624 // %NextCountReg = AGHI %ThisCountReg, -1
9625 // CGHI %NextCountReg, 0
9626 // JLH LoopMBB
9627 // # fall through to DoneMBB
9628 //
9629 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
9630 MBB = NextMBB;
9631 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
9632 .addReg(ThisDestReg).addImm(256).addReg(0);
9633 if (!HaveSingleBase)
9634 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
9635 .addReg(ThisSrcReg).addImm(256).addReg(0);
9636 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
9637 .addReg(ThisCountReg).addImm(-1);
9638 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9639 .addReg(NextCountReg).addImm(0);
9640 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9642 .addMBB(LoopMBB);
9643 MBB->addSuccessor(LoopMBB);
9644 MBB->addSuccessor(DoneMBB);
9645
9646 MBB = DoneMBB;
9647 if (IsRegForm) {
9648 // DoneMBB:
9649 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
9650 // # Use EXecute Relative Long for the remainder of the bytes. The target
9651 // instruction of the EXRL will have a length field of 1 since 0 is an
9652 // illegal value. The number of bytes processed becomes (%LenAdjReg &
9653 // 0xff) + 1.
9654 // # Fall through to AllDoneMBB.
9655 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9656 Register RemDestReg = HaveSingleBase ? RemSrcReg
9657 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9658 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
9659 .addReg(StartDestReg).addMBB(StartMBB)
9660 .addReg(NextDestReg).addMBB(NextMBB);
9661 if (!HaveSingleBase)
9662 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
9663 .addReg(StartSrcReg).addMBB(StartMBB)
9664 .addReg(NextSrcReg).addMBB(NextMBB);
9665 if (IsMemset)
9666 insertMemMemOp(MBB, MBB->end(),
9667 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
9668 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
9669 MachineInstrBuilder EXRL_MIB =
9670 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
9671 .addImm(Opcode)
9672 .addReg(LenAdjReg)
9673 .addReg(RemDestReg).addImm(DestDisp)
9674 .addReg(RemSrcReg).addImm(SrcDisp);
9675 MBB->addSuccessor(AllDoneMBB);
9676 MBB = AllDoneMBB;
9677 if (Opcode != SystemZ::MVC) {
9678 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
9679 if (EndMBB)
9680 MBB->addLiveIn(SystemZ::CC);
9681 }
9682 }
9684 }
9685
9686 // Handle any remaining bytes with straight-line code.
9687 while (ImmLength > 0) {
9688 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
9689 // The previous iteration might have created out-of-range displacements.
9690 // Apply them using LA/LAY if so.
9691 foldDisplIfNeeded(DestBase, DestDisp);
9692 foldDisplIfNeeded(SrcBase, SrcDisp);
9693 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
9694 DestDisp += ThisLength;
9695 SrcDisp += ThisLength;
9696 ImmLength -= ThisLength;
9697 // If there's another CLC to go, branch to the end if a difference
9698 // was found.
9699 if (EndMBB && ImmLength > 0) {
9701 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9703 .addMBB(EndMBB);
9704 MBB->addSuccessor(EndMBB);
9705 MBB->addSuccessor(NextMBB);
9706 MBB = NextMBB;
9707 }
9708 }
9709 if (EndMBB) {
9710 MBB->addSuccessor(EndMBB);
9711 MBB = EndMBB;
9712 MBB->addLiveIn(SystemZ::CC);
9713 }
9714
9715 MI.eraseFromParent();
9716 return MBB;
9717}
9718
9719// Decompose string pseudo-instruction MI into a loop that continually performs
9720// Opcode until CC != 3.
9721MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
9722 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9723 MachineFunction &MF = *MBB->getParent();
9724 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9726 DebugLoc DL = MI.getDebugLoc();
9727
9728 uint64_t End1Reg = MI.getOperand(0).getReg();
9729 uint64_t Start1Reg = MI.getOperand(1).getReg();
9730 uint64_t Start2Reg = MI.getOperand(2).getReg();
9731 uint64_t CharReg = MI.getOperand(3).getReg();
9732
9733 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
9734 uint64_t This1Reg = MRI.createVirtualRegister(RC);
9735 uint64_t This2Reg = MRI.createVirtualRegister(RC);
9736 uint64_t End2Reg = MRI.createVirtualRegister(RC);
9737
9738 MachineBasicBlock *StartMBB = MBB;
9740 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9741
9742 // StartMBB:
9743 // # fall through to LoopMBB
9744 MBB->addSuccessor(LoopMBB);
9745
9746 // LoopMBB:
9747 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
9748 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
9749 // R0L = %CharReg
9750 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
9751 // JO LoopMBB
9752 // # fall through to DoneMBB
9753 //
9754 // The load of R0L can be hoisted by post-RA LICM.
9755 MBB = LoopMBB;
9756
9757 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
9758 .addReg(Start1Reg).addMBB(StartMBB)
9759 .addReg(End1Reg).addMBB(LoopMBB);
9760 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
9761 .addReg(Start2Reg).addMBB(StartMBB)
9762 .addReg(End2Reg).addMBB(LoopMBB);
9763 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
9764 BuildMI(MBB, DL, TII->get(Opcode))
9765 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
9766 .addReg(This1Reg).addReg(This2Reg);
9767 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9769 MBB->addSuccessor(LoopMBB);
9770 MBB->addSuccessor(DoneMBB);
9771
9772 DoneMBB->addLiveIn(SystemZ::CC);
9773
9774 MI.eraseFromParent();
9775 return DoneMBB;
9776}
9777
9778// Update TBEGIN instruction with final opcode and register clobbers.
9779MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
9780 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
9781 bool NoFloat) const {
9782 MachineFunction &MF = *MBB->getParent();
9783 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9784 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9785
9786 // Update opcode.
9787 MI.setDesc(TII->get(Opcode));
9788
9789 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
9790 // Make sure to add the corresponding GRSM bits if they are missing.
9791 uint64_t Control = MI.getOperand(2).getImm();
9792 static const unsigned GPRControlBit[16] = {
9793 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
9794 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
9795 };
9796 Control |= GPRControlBit[15];
9797 if (TFI->hasFP(MF))
9798 Control |= GPRControlBit[11];
9799 MI.getOperand(2).setImm(Control);
9800
9801 // Add GPR clobbers.
9802 for (int I = 0; I < 16; I++) {
9803 if ((Control & GPRControlBit[I]) == 0) {
9804 unsigned Reg = SystemZMC::GR64Regs[I];
9805 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9806 }
9807 }
9808
9809 // Add FPR/VR clobbers.
9810 if (!NoFloat && (Control & 4) != 0) {
9811 if (Subtarget.hasVector()) {
9812 for (unsigned Reg : SystemZMC::VR128Regs) {
9813 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9814 }
9815 } else {
9816 for (unsigned Reg : SystemZMC::FP64Regs) {
9817 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9818 }
9819 }
9820 }
9821
9822 return MBB;
9823}
9824
9825MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
9826 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9827 MachineFunction &MF = *MBB->getParent();
9829 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9830 DebugLoc DL = MI.getDebugLoc();
9831
9832 Register SrcReg = MI.getOperand(0).getReg();
9833
9834 // Create new virtual register of the same class as source.
9835 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
9836 Register DstReg = MRI->createVirtualRegister(RC);
9837
9838 // Replace pseudo with a normal load-and-test that models the def as
9839 // well.
9840 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
9841 .addReg(SrcReg)
9842 .setMIFlags(MI.getFlags());
9843 MI.eraseFromParent();
9844
9845 return MBB;
9846}
9847
9848MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
9850 MachineFunction &MF = *MBB->getParent();
9852 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9853 DebugLoc DL = MI.getDebugLoc();
9854 const unsigned ProbeSize = getStackProbeSize(MF);
9855 Register DstReg = MI.getOperand(0).getReg();
9856 Register SizeReg = MI.getOperand(2).getReg();
9857
9858 MachineBasicBlock *StartMBB = MBB;
9860 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
9861 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
9862 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
9863 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
9864
9867
9868 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9869 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9870
9871 // LoopTestMBB
9872 // BRC TailTestMBB
9873 // # fallthrough to LoopBodyMBB
9874 StartMBB->addSuccessor(LoopTestMBB);
9875 MBB = LoopTestMBB;
9876 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
9877 .addReg(SizeReg)
9878 .addMBB(StartMBB)
9879 .addReg(IncReg)
9880 .addMBB(LoopBodyMBB);
9881 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
9882 .addReg(PHIReg)
9883 .addImm(ProbeSize);
9884 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9886 .addMBB(TailTestMBB);
9887 MBB->addSuccessor(LoopBodyMBB);
9888 MBB->addSuccessor(TailTestMBB);
9889
9890 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
9891 // J LoopTestMBB
9892 MBB = LoopBodyMBB;
9893 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
9894 .addReg(PHIReg)
9895 .addImm(ProbeSize);
9896 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
9897 .addReg(SystemZ::R15D)
9898 .addImm(ProbeSize);
9899 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9900 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
9901 .setMemRefs(VolLdMMO);
9902 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
9903 MBB->addSuccessor(LoopTestMBB);
9904
9905 // TailTestMBB
9906 // BRC DoneMBB
9907 // # fallthrough to TailMBB
9908 MBB = TailTestMBB;
9909 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9910 .addReg(PHIReg)
9911 .addImm(0);
9912 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9914 .addMBB(DoneMBB);
9915 MBB->addSuccessor(TailMBB);
9916 MBB->addSuccessor(DoneMBB);
9917
9918 // TailMBB
9919 // # fallthrough to DoneMBB
9920 MBB = TailMBB;
9921 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
9922 .addReg(SystemZ::R15D)
9923 .addReg(PHIReg);
9924 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9925 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
9926 .setMemRefs(VolLdMMO);
9927 MBB->addSuccessor(DoneMBB);
9928
9929 // DoneMBB
9930 MBB = DoneMBB;
9931 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
9932 .addReg(SystemZ::R15D);
9933
9934 MI.eraseFromParent();
9935 return DoneMBB;
9936}
9937
9938SDValue SystemZTargetLowering::
9939getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
9941 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
9942 SDLoc DL(SP);
9943 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
9944 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
9945}
9946
9949 switch (MI.getOpcode()) {
9950 case SystemZ::ADJCALLSTACKDOWN:
9951 case SystemZ::ADJCALLSTACKUP:
9952 return emitAdjCallStack(MI, MBB);
9953
9954 case SystemZ::Select32:
9955 case SystemZ::Select64:
9956 case SystemZ::Select128:
9957 case SystemZ::SelectF32:
9958 case SystemZ::SelectF64:
9959 case SystemZ::SelectF128:
9960 case SystemZ::SelectVR32:
9961 case SystemZ::SelectVR64:
9962 case SystemZ::SelectVR128:
9963 return emitSelect(MI, MBB);
9964
9965 case SystemZ::CondStore8Mux:
9966 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
9967 case SystemZ::CondStore8MuxInv:
9968 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
9969 case SystemZ::CondStore16Mux:
9970 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
9971 case SystemZ::CondStore16MuxInv:
9972 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
9973 case SystemZ::CondStore32Mux:
9974 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
9975 case SystemZ::CondStore32MuxInv:
9976 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
9977 case SystemZ::CondStore8:
9978 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
9979 case SystemZ::CondStore8Inv:
9980 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
9981 case SystemZ::CondStore16:
9982 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
9983 case SystemZ::CondStore16Inv:
9984 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
9985 case SystemZ::CondStore32:
9986 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
9987 case SystemZ::CondStore32Inv:
9988 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
9989 case SystemZ::CondStore64:
9990 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
9991 case SystemZ::CondStore64Inv:
9992 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
9993 case SystemZ::CondStoreF32:
9994 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
9995 case SystemZ::CondStoreF32Inv:
9996 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
9997 case SystemZ::CondStoreF64:
9998 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
9999 case SystemZ::CondStoreF64Inv:
10000 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
10001
10002 case SystemZ::SCmp128Hi:
10003 return emitICmp128Hi(MI, MBB, false);
10004 case SystemZ::UCmp128Hi:
10005 return emitICmp128Hi(MI, MBB, true);
10006
10007 case SystemZ::PAIR128:
10008 return emitPair128(MI, MBB);
10009 case SystemZ::AEXT128:
10010 return emitExt128(MI, MBB, false);
10011 case SystemZ::ZEXT128:
10012 return emitExt128(MI, MBB, true);
10013
10014 case SystemZ::ATOMIC_SWAPW:
10015 return emitAtomicLoadBinary(MI, MBB, 0);
10016
10017 case SystemZ::ATOMIC_LOADW_AR:
10018 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
10019 case SystemZ::ATOMIC_LOADW_AFI:
10020 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
10021
10022 case SystemZ::ATOMIC_LOADW_SR:
10023 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
10024
10025 case SystemZ::ATOMIC_LOADW_NR:
10026 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
10027 case SystemZ::ATOMIC_LOADW_NILH:
10028 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
10029
10030 case SystemZ::ATOMIC_LOADW_OR:
10031 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
10032 case SystemZ::ATOMIC_LOADW_OILH:
10033 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
10034
10035 case SystemZ::ATOMIC_LOADW_XR:
10036 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
10037 case SystemZ::ATOMIC_LOADW_XILF:
10038 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
10039
10040 case SystemZ::ATOMIC_LOADW_NRi:
10041 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
10042 case SystemZ::ATOMIC_LOADW_NILHi:
10043 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
10044
10045 case SystemZ::ATOMIC_LOADW_MIN:
10046 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
10047 case SystemZ::ATOMIC_LOADW_MAX:
10048 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
10049 case SystemZ::ATOMIC_LOADW_UMIN:
10050 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
10051 case SystemZ::ATOMIC_LOADW_UMAX:
10052 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
10053
10054 case SystemZ::ATOMIC_CMP_SWAPW:
10055 return emitAtomicCmpSwapW(MI, MBB);
10056 case SystemZ::MVCImm:
10057 case SystemZ::MVCReg:
10058 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
10059 case SystemZ::NCImm:
10060 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
10061 case SystemZ::OCImm:
10062 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
10063 case SystemZ::XCImm:
10064 case SystemZ::XCReg:
10065 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
10066 case SystemZ::CLCImm:
10067 case SystemZ::CLCReg:
10068 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
10069 case SystemZ::MemsetImmImm:
10070 case SystemZ::MemsetImmReg:
10071 case SystemZ::MemsetRegImm:
10072 case SystemZ::MemsetRegReg:
10073 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
10074 case SystemZ::CLSTLoop:
10075 return emitStringWrapper(MI, MBB, SystemZ::CLST);
10076 case SystemZ::MVSTLoop:
10077 return emitStringWrapper(MI, MBB, SystemZ::MVST);
10078 case SystemZ::SRSTLoop:
10079 return emitStringWrapper(MI, MBB, SystemZ::SRST);
10080 case SystemZ::TBEGIN:
10081 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
10082 case SystemZ::TBEGIN_nofloat:
10083 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
10084 case SystemZ::TBEGINC:
10085 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
10086 case SystemZ::LTEBRCompare_Pseudo:
10087 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
10088 case SystemZ::LTDBRCompare_Pseudo:
10089 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
10090 case SystemZ::LTXBRCompare_Pseudo:
10091 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
10092
10093 case SystemZ::PROBED_ALLOCA:
10094 return emitProbedAlloca(MI, MBB);
10095 case SystemZ::EH_SjLj_SetJmp:
10096 return emitEHSjLjSetJmp(MI, MBB);
10097 case SystemZ::EH_SjLj_LongJmp:
10098 return emitEHSjLjLongJmp(MI, MBB);
10099
10100 case TargetOpcode::STACKMAP:
10101 case TargetOpcode::PATCHPOINT:
10102 return emitPatchPoint(MI, MBB);
10103
10104 default:
10105 llvm_unreachable("Unexpected instr type to insert");
10106 }
10107}
10108
10109// This is only used by the isel schedulers, and is needed only to prevent
10110// compiler from crashing when list-ilp is used.
10111const TargetRegisterClass *
10112SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
10113 if (VT == MVT::Untyped)
10114 return &SystemZ::ADDR128BitRegClass;
10116}
10117
10118SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
10119 SelectionDAG &DAG) const {
10120 SDLoc dl(Op);
10121 /*
10122 The rounding method is in FPC Byte 3 bits 6-7, and has the following
10123 settings:
10124 00 Round to nearest
10125 01 Round to 0
10126 10 Round to +inf
10127 11 Round to -inf
10128
10129 FLT_ROUNDS, on the other hand, expects the following:
10130 -1 Undefined
10131 0 Round to 0
10132 1 Round to nearest
10133 2 Round to +inf
10134 3 Round to -inf
10135 */
10136
10137 // Save FPC to register.
10138 SDValue Chain = Op.getOperand(0);
10139 SDValue EFPC(
10140 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
10141 Chain = EFPC.getValue(1);
10142
10143 // Transform as necessary
10144 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
10145 DAG.getConstant(3, dl, MVT::i32));
10146 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
10147 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
10148 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
10149 DAG.getConstant(1, dl, MVT::i32)));
10150
10151 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
10152 DAG.getConstant(1, dl, MVT::i32));
10153 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
10154
10155 return DAG.getMergeValues({RetVal, Chain}, dl);
10156}
10157
10158SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
10159 SelectionDAG &DAG) const {
10160 EVT VT = Op.getValueType();
10161 Op = Op.getOperand(0);
10162 EVT OpVT = Op.getValueType();
10163
10164 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
10165
10166 SDLoc DL(Op);
10167
10168 // load a 0 vector for the third operand of VSUM.
10169 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));
10170
10171 // execute VSUM.
10172 switch (OpVT.getScalarSizeInBits()) {
10173 case 8:
10174 case 16:
10175 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
10176 [[fallthrough]];
10177 case 32:
10178 case 64:
10179 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
10180 DAG.getBitcast(Op.getValueType(), Zero));
10181 break;
10182 case 128:
10183 break; // VSUM over v1i128 should not happen and would be a noop
10184 default:
10185 llvm_unreachable("Unexpected scalar size.");
10186 }
10187 // Cast to original vector type, retrieve last element.
10188 return DAG.getNode(
10189 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),
10190 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));
10191}
10192
10193// Only consider a function fully internal as long as it has local linkage
10194// and is not used in any other way than acting as the called function at
10195// call sites.
10196bool SystemZTargetLowering::isFullyInternal(const Function *Fn) const {
10197 if (!Fn->hasLocalLinkage())
10198 return false;
10199 for (const User *U : Fn->users()) {
10200 if (auto *CB = dyn_cast<CallBase>(U)) {
10201 if (CB->getCalledFunction() != Fn)
10202 return false;
10203 } else
10204 return false;
10205 }
10206 return true;
10207}
10208
10210 FunctionType *FT = F->getFunctionType();
10211 const AttributeList &Attrs = F->getAttributes();
10212 if (Attrs.hasRetAttrs())
10213 OS << Attrs.getAsString(AttributeList::ReturnIndex) << " ";
10214 OS << *F->getReturnType() << " @" << F->getName() << "(";
10215 for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) {
10216 if (I)
10217 OS << ", ";
10218 OS << *FT->getParamType(I);
10219 AttributeSet ArgAttrs = Attrs.getParamAttrs(I);
10220 for (auto A : {Attribute::SExt, Attribute::ZExt, Attribute::NoExt})
10221 if (ArgAttrs.hasAttribute(A))
10223 }
10224 OS << ")\n";
10225}
10226
10227void SystemZTargetLowering::
10228verifyNarrowIntegerArgs_Call(const SmallVectorImpl<ISD::OutputArg> &Outs,
10229 const Function *F, SDValue Callee) const {
10230 bool IsInternal = false;
10231 const Function *CalleeFn = nullptr;
10232 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
10233 if ((CalleeFn = dyn_cast<Function>(G->getGlobal())))
10234 IsInternal = isFullyInternal(CalleeFn);
10235 if (!verifyNarrowIntegerArgs(Outs, IsInternal)) {
10236 errs() << "ERROR: Missing extension attribute of passed "
10237 << "value in call to function:\n" << "Callee: ";
10238 if (CalleeFn != nullptr)
10239 printFunctionArgExts(CalleeFn, errs());
10240 else
10241 errs() << "-\n";
10242 errs() << "Caller: ";
10244 llvm_unreachable("");
10245 }
10246}
10247
10248void SystemZTargetLowering::
10249verifyNarrowIntegerArgs_Ret(const SmallVectorImpl<ISD::OutputArg> &Outs,
10250 const Function *F) const {
10251 if (!verifyNarrowIntegerArgs(Outs, isFullyInternal(F))) {
10252 errs() << "ERROR: Missing extension attribute of returned "
10253 << "value from function:\n";
10255 llvm_unreachable("");
10256 }
10257}
10258
10259// Verify that narrow integer arguments are extended as required by the ABI.
10260// Return false if an error is found.
10261bool SystemZTargetLowering::
10262verifyNarrowIntegerArgs(const SmallVectorImpl<ISD::OutputArg> &Outs,
10263 bool IsInternal) const {
10264 if (IsInternal || !Subtarget.isTargetELF())
10265 return true;
10266
10267 // Temporarily only do the check when explicitly requested, until it can be
10268 // enabled by default.
10270 return true;
10271
10274 return true;
10275 } else if (!getTargetMachine().Options.VerifyArgABICompliance)
10276 return true;
10277
10278 for (unsigned i = 0; i < Outs.size(); ++i) {
10279 MVT VT = Outs[i].VT;
10280 ISD::ArgFlagsTy Flags = Outs[i].Flags;
10281 if (VT.isInteger()) {
10282 assert((VT == MVT::i32 || VT.getSizeInBits() >= 64) &&
10283 "Unexpected integer argument VT.");
10284 if (VT == MVT::i32 &&
10285 !Flags.isSExt() && !Flags.isZExt() && !Flags.isNoExt())
10286 return false;
10287 }
10288 }
10289
10290 return true;
10291}
unsigned const MachineRegisterInfo * MRI
#define Success
unsigned Intr
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
IRTranslator LLVM IR MI
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
uint64_t High
uint64_t IntrinsicInst * II
#define P(N)
static constexpr Register SPReg
static bool isSelectPseudo(MachineInstr &MI)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
static SDValue getI128Select(SelectionDAG &DAG, const SDLoc &DL, Comparison C, SDValue TrueOp, SDValue FalseOp)
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void printFunctionArgExts(const Function *F, raw_fd_ostream &OS)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static cl::opt< bool > EnableIntArgExtCheck("argext-abi-check", cl::init(false), cl::desc("Verify that narrow int args are properly extended per the " "SystemZ ABI."))
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue lowerAddrSpaceCast(SDValue Op, SelectionDAG &DAG)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
#define OPCODE(NAME)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:229
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1386
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:258
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition: APInt.h:322
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:370
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
@ Add
*p = old + v
Definition: Instructions.h:720
@ Or
*p = old | v
Definition: Instructions.h:728
@ Sub
*p = old - v
Definition: Instructions.h:722
@ And
*p = old & v
Definition: Instructions.h:724
@ Xor
*p = old ^ v
Definition: Instructions.h:730
BinOp getOperation() const
Definition: Instructions.h:805
bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
static StringRef getNameFromAttrKind(Attribute::AttrKind AttrKind)
Definition: Attributes.cpp:314
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
The address of a basic block.
Definition: Constants.h:893
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
iterator end()
Definition: DenseMap.h:84
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:778
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:595
bool hasLocalLinkage() const
Definition: GlobalValue.h:529
bool hasPrivateLinkage() const
Definition: GlobalValue.h:528
bool hasInternalLinkage() const
Definition: GlobalValue.h:527
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setMaxCallFrameSize(uint64_t S)
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setReturnAddressIsTaken(bool s)
MachineFunctionProperties & reset(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getMachineOpcode() const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:750
SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:499
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:801
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getRegister(Register Reg, EVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:503
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:760
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:856
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:827
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:497
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:712
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:890
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:498
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:700
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:796
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:492
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:873
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:510
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:767
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:580
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void reserve(size_type N)
Definition: SmallVector.h:663
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:470
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:684
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
iterator end() const
Definition: StringRef.h:118
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
A SystemZ-specific class detailing special use registers particular for calling conventions.
A SystemZ-specific constant pool value.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const
const TargetFrameLowering * getFrameLowering() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
const SystemZRegisterInfo * getRegisterInfo() const override
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
bool useSoftFloat() const override
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Let target indicate that an extending atomic load of the specified type is legal.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:228
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
iterator_range< user_iterator > users()
Definition: Value.h:421
int getNumOccurrences() const
Definition: CommandLine.h:399
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
self_iterator getIterator()
Definition: ilist_node.h:132
A raw_ostream that writes to a file descriptor.
Definition: raw_ostream.h:460
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1197
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1193
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:753
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:153
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1340
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1226
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1342
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1312
@ STRICT_FCEIL
Definition: ISDOpcodes.h:441
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1343
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
Definition: ISDOpcodes.h:1325
@ STRICT_FMINIMUM
Definition: ISDOpcodes.h:451
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1299
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1304
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:871
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1338
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1339
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1494
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:465
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:752
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1292
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1341
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1127
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:522
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1308
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1222
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:445
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1444
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:931
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1336
@ STRICT_FMAXIMUM
Definition: ISDOpcodes.h:450
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:439
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:440
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1344
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1112
@ ConstantPool
Definition: ISDOpcodes.h:82
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:860
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ STRICT_FROUND
Definition: ISDOpcodes.h:443
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:464
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:442
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1334
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:458
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:480
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:457
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1335
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1253
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1279
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1333
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:882
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:958
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:438
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:147
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1141
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ AssertZext
Definition: ISDOpcodes.h:62
@ STRICT_FRINT
Definition: ISDOpcodes.h:437
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1398
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1610
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1590
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
ID ArrayRef< Type * > Tys
Definition: Intrinsics.h:102
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition: SystemZ.h:40
static bool isImmHH(uint64_t Val)
Definition: SystemZ.h:176
const unsigned CCMASK_TEND
Definition: SystemZ.h:97
const unsigned CCMASK_CS_EQ
Definition: SystemZ.h:67
const unsigned CCMASK_TBEGIN
Definition: SystemZ.h:92
const unsigned CCMASK_0
Definition: SystemZ.h:27
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition: SystemZ.h:82
const unsigned CCMASK_LOGICAL_CARRY
Definition: SystemZ.h:60
const unsigned TDCMASK_NORMAL_MINUS
Definition: SystemZ.h:122
const unsigned CCMASK_TDC
Definition: SystemZ.h:109
const unsigned CCMASK_FCMP
Definition: SystemZ.h:48
const unsigned CCMASK_TM_SOME_0
Definition: SystemZ.h:81
static bool isImmHL(uint64_t Val)
Definition: SystemZ.h:171
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition: SystemZ.h:124
const unsigned PFD_READ
Definition: SystemZ.h:115
const unsigned CCMASK_1
Definition: SystemZ.h:28
const unsigned TDCMASK_NORMAL_PLUS
Definition: SystemZ.h:121
const unsigned PFD_WRITE
Definition: SystemZ.h:116
const unsigned CCMASK_CMP_GT
Definition: SystemZ.h:37
const unsigned TDCMASK_QNAN_MINUS
Definition: SystemZ.h:128
const unsigned CCMASK_CS
Definition: SystemZ.h:69
const unsigned CCMASK_ANY
Definition: SystemZ.h:31
const unsigned CCMASK_ARITH
Definition: SystemZ.h:55
const unsigned CCMASK_TM_MIXED_MSB_0
Definition: SystemZ.h:78
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition: SystemZ.h:123
static bool isImmLL(uint64_t Val)
Definition: SystemZ.h:161
const unsigned VectorBits
Definition: SystemZ.h:154
static bool isImmLH(uint64_t Val)
Definition: SystemZ.h:166
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition: SystemZ.h:125
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition: SystemZ.h:77
const unsigned IPM_CC
Definition: SystemZ.h:112
const unsigned CCMASK_CMP_LE
Definition: SystemZ.h:39
const unsigned CCMASK_CMP_O
Definition: SystemZ.h:44
const unsigned CCMASK_CMP_EQ
Definition: SystemZ.h:35
const unsigned VectorBytes
Definition: SystemZ.h:158
const unsigned TDCMASK_INFINITY_MINUS
Definition: SystemZ.h:126
const unsigned CCMASK_ICMP
Definition: SystemZ.h:47
const unsigned CCMASK_VCMP_ALL
Definition: SystemZ.h:101
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition: SystemZ.h:104
const unsigned CCMASK_TM_MIXED_MSB_1
Definition: SystemZ.h:79
const unsigned CCMASK_TM_MSB_0
Definition: SystemZ.h:83
const unsigned CCMASK_ARITH_OVERFLOW
Definition: SystemZ.h:54
const unsigned CCMASK_CS_NE
Definition: SystemZ.h:68
const unsigned TDCMASK_SNAN_PLUS
Definition: SystemZ.h:129
const unsigned CCMASK_TM
Definition: SystemZ.h:85
const unsigned CCMASK_3
Definition: SystemZ.h:30
const unsigned CCMASK_CMP_LT
Definition: SystemZ.h:36
const unsigned CCMASK_CMP_NE
Definition: SystemZ.h:38
const unsigned TDCMASK_ZERO_PLUS
Definition: SystemZ.h:119
const unsigned TDCMASK_QNAN_PLUS
Definition: SystemZ.h:127
const unsigned TDCMASK_ZERO_MINUS
Definition: SystemZ.h:120
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition: SystemZ.h:80
const unsigned CCMASK_LOGICAL_BORROW
Definition: SystemZ.h:62
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition: SystemZ.h:43
const unsigned CCMASK_LOGICAL
Definition: SystemZ.h:64
const unsigned CCMASK_TM_MSB_1
Definition: SystemZ.h:84
const unsigned TDCMASK_SNAN_MINUS
Definition: SystemZ.h:130
@ GeneralDynamic
Definition: CodeGen.h:46
@ GS
Definition: X86.h:210
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
support::ulittle32_t Word
Definition: IRSymtab.h:52
NodeAddr< CodeNode * > Code
Definition: RDFGraph.h:388
constexpr const char32_t SBase
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:355
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:257
@ Done
Definition: Threading.h:61
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:293
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ Mul
Product of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:583
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define EQ(a, b)
Definition: regexec.c:112
#define NC
Definition: regutils.h:42
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:243
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:178
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:164
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:73
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:303
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:172
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:137
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
const uint32_t * getNoPreservedMask() const override
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})