LLVM 23.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
18#include "llvm/ADT/SmallSet.h"
24#include "llvm/IR/GlobalAlias.h"
26#include "llvm/IR/Intrinsics.h"
27#include "llvm/IR/IntrinsicsS390.h"
32#include <cctype>
33#include <optional>
34
35using namespace llvm;
36
37#define DEBUG_TYPE "systemz-lower"
38
39// Temporarily let this be disabled by default until all known problems
40// related to argument extensions are fixed.
42 "argext-abi-check", cl::init(false),
43 cl::desc("Verify that narrow int args are properly extended per the "
44 "SystemZ ABI."));
45
46namespace {
47// Represents information about a comparison.
48struct Comparison {
49 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
50 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
51 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
52
53 // The operands to the comparison.
54 SDValue Op0, Op1;
55
56 // Chain if this is a strict floating-point comparison.
57 SDValue Chain;
58
59 // The opcode that should be used to compare Op0 and Op1.
60 unsigned Opcode;
61
62 // A SystemZICMP value. Only used for integer comparisons.
63 unsigned ICmpType;
64
65 // The mask of CC values that Opcode can produce.
66 unsigned CCValid;
67
68 // The mask of CC values for which the original condition is true.
69 unsigned CCMask;
70};
71} // end anonymous namespace
72
73// Classify VT as either 32 or 64 bit.
74static bool is32Bit(EVT VT) {
75 switch (VT.getSimpleVT().SimpleTy) {
76 case MVT::i32:
77 return true;
78 case MVT::i64:
79 return false;
80 default:
81 llvm_unreachable("Unsupported type");
82 }
83}
84
85// Return a version of MachineOperand that can be safely used before the
86// final use.
88 if (Op.isReg())
89 Op.setIsKill(false);
90 return Op;
91}
92
94 const SystemZSubtarget &STI)
95 : TargetLowering(TM, STI), Subtarget(STI) {
96 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
97
98 auto *Regs = STI.getSpecialRegisters();
99
100 // Set up the register classes.
101 if (Subtarget.hasHighWord())
102 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
103 else
104 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
105 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
106 if (!useSoftFloat()) {
107 if (Subtarget.hasVector()) {
108 addRegisterClass(MVT::f16, &SystemZ::VR16BitRegClass);
109 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
110 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
111 } else {
112 addRegisterClass(MVT::f16, &SystemZ::FP16BitRegClass);
113 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
114 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
115 }
116 if (Subtarget.hasVectorEnhancements1())
117 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
118 else
119 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
120
121 if (Subtarget.hasVector()) {
122 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
123 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
124 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
125 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
126 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
127 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
128 }
129
130 if (Subtarget.hasVector())
131 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
132 }
133
134 // Compute derived properties from the register classes
135 computeRegisterProperties(Subtarget.getRegisterInfo());
136
137 // Set up special registers.
138 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
139
140 // TODO: It may be better to default to latency-oriented scheduling, however
141 // LLVM's current latency-oriented scheduler can't handle physreg definitions
142 // such as SystemZ has with CC, so set this to the register-pressure
143 // scheduler, because it can.
145
148
150
151 // Instructions are strings of 2-byte aligned 2-byte values.
153 // For performance reasons we prefer 16-byte alignment.
155
156 // Handle operations that are handled in a similar way for all types.
157 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
158 I <= MVT::LAST_FP_VALUETYPE;
159 ++I) {
161 if (isTypeLegal(VT)) {
162 // Lower SET_CC into an IPM-based sequence.
166
167 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
169
170 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
173 }
174 }
175
176 // Expand jump table branches as address arithmetic followed by an
177 // indirect jump.
179
180 // Expand BRCOND into a BR_CC (see above).
182
183 // Handle integer types except i128.
184 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
185 I <= MVT::LAST_INTEGER_VALUETYPE;
186 ++I) {
188 if (isTypeLegal(VT) && VT != MVT::i128) {
190
191 // Expand individual DIV and REMs into DIVREMs.
198
199 // Support addition/subtraction with overflow.
202
203 // Support addition/subtraction with carry.
206
207 // Support carry in as value rather than glue.
210
211 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
212 // available, or if the operand is constant.
214
215 // Use POPCNT on z196 and above.
216 if (Subtarget.hasPopulationCount())
218 else
220
221 // No special instructions for these.
224
225 // Use *MUL_LOHI where possible instead of MULH*.
230
231 // The fp<=>i32/i64 conversions are all Legal except for f16 and for
232 // unsigned on z10 (only z196 and above have native support for
233 // unsigned conversions).
240 // Handle unsigned 32-bit input types as signed 64-bit types on z10.
241 auto OpAction =
242 (!Subtarget.hasFPExtension() && VT == MVT::i32) ? Promote : Custom;
243 setOperationAction(Op, VT, OpAction);
244 }
245 }
246 }
247
248 // Handle i128 if legal.
249 if (isTypeLegal(MVT::i128)) {
250 // No special instructions for these.
257
258 // We may be able to use VSLDB/VSLD/VSRD for these.
261
262 // No special instructions for these before z17.
263 if (!Subtarget.hasVectorEnhancements3()) {
273 } else {
274 // Even if we do have a legal 128-bit multiply, we do not
275 // want 64-bit multiply-high operations to use it.
278 }
279
280 // Support addition/subtraction with carry.
285
286 // Use VPOPCT and add up partial results.
288
289 // Additional instructions available with z17.
290 if (Subtarget.hasVectorEnhancements3()) {
291 setOperationAction(ISD::ABS, MVT::i128, Legal);
292
294 MVT::i128, Legal);
295 }
296 }
297
298 // These need custom handling in order to handle the f16 conversions.
307
308 // Type legalization will convert 8- and 16-bit atomic operations into
309 // forms that operate on i32s (but still keeping the original memory VT).
310 // Lower them into full i32 operations.
322
323 // Whether or not i128 is not a legal type, we need to custom lower
324 // the atomic operations in order to exploit SystemZ instructions.
329
330 // Mark sign/zero extending atomic loads as legal, which will make
331 // DAGCombiner fold extensions into atomic loads if possible.
333 {MVT::i8, MVT::i16, MVT::i32}, Legal);
335 {MVT::i8, MVT::i16}, Legal);
337 MVT::i8, Legal);
338
339 // We can use the CC result of compare-and-swap to implement
340 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
344
346
347 // Traps are legal, as we will convert them to "j .+2".
348 setOperationAction(ISD::TRAP, MVT::Other, Legal);
349
350 // We have native support for a 64-bit CTLZ, via FLOGR.
354
355 // On z17 we have native support for a 64-bit CTTZ.
356 if (Subtarget.hasMiscellaneousExtensions4()) {
360 }
361
362 // On z15 we have native support for a 64-bit CTPOP.
363 if (Subtarget.hasMiscellaneousExtensions3()) {
366 }
367
368 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
370
371 // Expand 128 bit shifts without using a libcall.
375
376 // Also expand 256 bit shifts if i128 is a legal type.
377 if (isTypeLegal(MVT::i128)) {
381 }
382
383 // Handle bitcast from fp128 to i128.
384 if (!isTypeLegal(MVT::i128))
386
387 // We have native instructions for i8, i16 and i32 extensions, but not i1.
389 for (MVT VT : MVT::integer_valuetypes()) {
393 }
394
395 // Handle the various types of symbolic address.
401
402 // We need to handle dynamic allocations specially because of the
403 // 160-byte area at the bottom of the stack.
406
409
410 // Handle prefetches with PFD or PFDRL.
412
413 // Handle readcyclecounter with STCKF.
415
417 // Assume by default that all vector operations need to be expanded.
418 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
419 if (getOperationAction(Opcode, VT) == Legal)
420 setOperationAction(Opcode, VT, Expand);
421
422 // Likewise all truncating stores and extending loads.
423 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
424 setTruncStoreAction(VT, InnerVT, Expand);
427 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
428 }
429
430 if (isTypeLegal(VT)) {
431 // These operations are legal for anything that can be stored in a
432 // vector register, even if there is no native support for the format
433 // as such. In particular, we can do these for v4f32 even though there
434 // are no specific instructions for that format.
440
441 // Likewise, except that we need to replace the nodes with something
442 // more specific.
445 }
446 }
447
448 // Handle integer vector types.
450 if (isTypeLegal(VT)) {
451 // These operations have direct equivalents.
456 if (VT != MVT::v2i64 || Subtarget.hasVectorEnhancements3()) {
460 }
461 if (Subtarget.hasVectorEnhancements3() &&
462 VT != MVT::v16i8 && VT != MVT::v8i16) {
467 }
472 if (Subtarget.hasVectorEnhancements1())
474 else
478
479 // Convert a GPR scalar to a vector by inserting it into element 0.
481
482 // Use a series of unpacks for extensions.
485
486 // Detect shifts/rotates by a scalar amount and convert them into
487 // V*_BY_SCALAR.
492
493 // Add ISD::VECREDUCE_ADD as custom in order to implement
494 // it with VZERO+VSUM
496
497 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
498 // and inverting the result as necessary.
500
502 Legal);
503 }
504 }
505
506 if (Subtarget.hasVector()) {
507 // There should be no need to check for float types other than v2f64
508 // since <2 x f32> isn't a legal type.
517
526 }
527
528 if (Subtarget.hasVectorEnhancements2()) {
537
546 }
547
548 // Handle floating-point types.
549 if (!useSoftFloat()) {
550 // Promote all f16 operations to float, with some exceptions below.
551 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
552 setOperationAction(Opc, MVT::f16, Promote);
554 for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
555 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
556 setTruncStoreAction(VT, MVT::f16, Expand);
557 }
559 setOperationAction(Op, MVT::f16, Subtarget.hasVector() ? Legal : Custom);
564 for (auto Op : {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN})
565 setOperationAction(Op, MVT::f16, Legal);
566 }
567
568 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
569 I <= MVT::LAST_FP_VALUETYPE;
570 ++I) {
572 if (isTypeLegal(VT) && VT != MVT::f16) {
573 // We can use FI for FRINT.
575
576 // We can use the extended form of FI for other rounding operations.
577 if (Subtarget.hasFPExtension()) {
584 }
585
586 // No special instructions for these.
592
593 // Special treatment.
595
596 // Handle constrained floating-point operations.
605 if (Subtarget.hasFPExtension()) {
612 }
613
614 // Extension from f16 needs libcall.
617 }
618 }
619
620 // Handle floating-point vector types.
621 if (Subtarget.hasVector()) {
622 // Scalar-to-vector conversion is just a subreg.
625
626 // Some insertions and extractions can be done directly but others
627 // need to go via integers.
632
633 // These operations have direct equivalents.
634 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
635 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
636 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
637 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
638 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
639 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
640 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
641 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
642 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
645 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
649
650 // Handle constrained floating-point operations.
664
669 if (Subtarget.hasVectorEnhancements1()) {
672 }
673 }
674
675 // The vector enhancements facility 1 has instructions for these.
676 if (Subtarget.hasVectorEnhancements1()) {
677 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
678 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
679 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
680 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
681 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
682 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
683 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
684 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
685 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
688 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
692
697
702
707
712
717
718 // Handle constrained floating-point operations.
732 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
733 MVT::v4f32, MVT::v2f64 }) {
738 }
739 }
740
741 // We only have fused f128 multiply-addition on vector registers.
742 if (!Subtarget.hasVectorEnhancements1()) {
745 }
746
747 // We don't have a copysign instruction on vector registers.
748 if (Subtarget.hasVectorEnhancements1())
750
751 // Needed so that we don't try to implement f128 constant loads using
752 // a load-and-extend of a f80 constant (in cases where the constant
753 // would fit in an f80).
754 for (MVT VT : MVT::fp_valuetypes())
755 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
756
757 // We don't have extending load instruction on vector registers.
758 if (Subtarget.hasVectorEnhancements1()) {
759 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
760 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
761 }
762
763 // Floating-point truncation and stores need to be done separately.
764 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
765 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
766 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
767
768 // We have 64-bit FPR<->GPR moves, but need special handling for
769 // 32-bit forms.
770 if (!Subtarget.hasVector()) {
773 }
774
775 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
776 // structure, but VAEND is a no-op.
780
781 if (Subtarget.isTargetzOS()) {
782 // Handle address space casts between mixed sized pointers.
785 }
786
788
789 // Codes for which we want to perform some z-specific combinations.
793 ISD::LOAD,
806 ISD::SRL,
807 ISD::SRA,
808 ISD::MUL,
809 ISD::SDIV,
810 ISD::UDIV,
811 ISD::SREM,
812 ISD::UREM,
815
816 // Handle intrinsics.
819
820 // We're not using SJLJ for exception handling, but they're implemented
821 // solely to support use of __builtin_setjmp / __builtin_longjmp.
824
825 // We want to use MVC in preference to even a single load/store pair.
826 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
828
829 // The main memset sequence is a byte store followed by an MVC.
830 // Two STC or MV..I stores win over that, but the kind of fused stores
831 // generated by target-independent code don't when the byte value is
832 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
833 // than "STC;MVC". Handle the choice in target-specific code instead.
834 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
836
837 // Default to having -disable-strictnode-mutation on
838 IsStrictFPEnabled = true;
839}
840
842 return Subtarget.hasSoftFloat();
843}
844
846 LLVMContext &, EVT VT) const {
847 if (!VT.isVector())
848 return MVT::i32;
850}
851
853 const MachineFunction &MF, EVT VT) const {
854 if (useSoftFloat())
855 return false;
856
857 VT = VT.getScalarType();
858
859 if (!VT.isSimple())
860 return false;
861
862 switch (VT.getSimpleVT().SimpleTy) {
863 case MVT::f32:
864 case MVT::f64:
865 return true;
866 case MVT::f128:
867 return Subtarget.hasVectorEnhancements1();
868 default:
869 break;
870 }
871
872 return false;
873}
874
875// Return true if the constant can be generated with a vector instruction,
876// such as VGM, VGMB or VREPI.
878 const SystemZSubtarget &Subtarget) {
879 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
880 if (!Subtarget.hasVector() ||
881 (isFP128 && !Subtarget.hasVectorEnhancements1()))
882 return false;
883
884 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
885 // preferred way of creating all-zero and all-one vectors so give it
886 // priority over other methods below.
887 unsigned Mask = 0;
888 unsigned I = 0;
889 for (; I < SystemZ::VectorBytes; ++I) {
890 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
891 if (Byte == 0xff)
892 Mask |= 1ULL << I;
893 else if (Byte != 0)
894 break;
895 }
896 if (I == SystemZ::VectorBytes) {
897 Opcode = SystemZISD::BYTE_MASK;
898 OpVals.push_back(Mask);
900 return true;
901 }
902
903 if (SplatBitSize > 64)
904 return false;
905
906 auto TryValue = [&](uint64_t Value) -> bool {
907 // Try VECTOR REPLICATE IMMEDIATE
908 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
909 if (isInt<16>(SignedValue)) {
910 OpVals.push_back(((unsigned) SignedValue));
911 Opcode = SystemZISD::REPLICATE;
913 SystemZ::VectorBits / SplatBitSize);
914 return true;
915 }
916 // Try VECTOR GENERATE MASK
917 unsigned Start, End;
918 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
919 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
920 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
921 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
922 OpVals.push_back(Start - (64 - SplatBitSize));
923 OpVals.push_back(End - (64 - SplatBitSize));
924 Opcode = SystemZISD::ROTATE_MASK;
926 SystemZ::VectorBits / SplatBitSize);
927 return true;
928 }
929 return false;
930 };
931
932 // First try assuming that any undefined bits above the highest set bit
933 // and below the lowest set bit are 1s. This increases the likelihood of
934 // being able to use a sign-extended element value in VECTOR REPLICATE
935 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
936 uint64_t SplatBitsZ = SplatBits.getZExtValue();
937 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
938 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
939 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
940 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
941 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
942 if (TryValue(SplatBitsZ | Upper | Lower))
943 return true;
944
945 // Now try assuming that any undefined bits between the first and
946 // last defined set bits are set. This increases the chances of
947 // using a non-wraparound mask.
948 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
949 return TryValue(SplatBitsZ | Middle);
950}
951
953 if (IntImm.isSingleWord()) {
954 IntBits = APInt(128, IntImm.getZExtValue());
955 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
956 } else
957 IntBits = IntImm;
958 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
959
960 // Find the smallest splat.
961 SplatBits = IntImm;
962 unsigned Width = SplatBits.getBitWidth();
963 while (Width > 8) {
964 unsigned HalfSize = Width / 2;
965 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
966 APInt LowValue = SplatBits.trunc(HalfSize);
967
968 // If the two halves do not match, stop here.
969 if (HighValue != LowValue || 8 > HalfSize)
970 break;
971
972 SplatBits = HighValue;
973 Width = HalfSize;
974 }
975 SplatUndef = 0;
976 SplatBitSize = Width;
977}
978
980 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
981 bool HasAnyUndefs;
982
983 // Get IntBits by finding the 128 bit splat.
984 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
985 true);
986
987 // Get SplatBits by finding the 8 bit or greater splat.
988 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
989 true);
990}
991
993 bool ForCodeSize) const {
994 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
995 if (Imm.isZero() || Imm.isNegZero())
996 return true;
997
999}
1000
1003 MachineBasicBlock *MBB) const {
1004 DebugLoc DL = MI.getDebugLoc();
1005 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1006 const SystemZRegisterInfo *TRI = Subtarget.getRegisterInfo();
1007
1008 MachineFunction *MF = MBB->getParent();
1010
1011 const BasicBlock *BB = MBB->getBasicBlock();
1012 MachineFunction::iterator I = ++MBB->getIterator();
1013
1014 Register DstReg = MI.getOperand(0).getReg();
1015 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
1016 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
1017 (void)TRI;
1018 Register MainDstReg = MRI.createVirtualRegister(RC);
1019 Register RestoreDstReg = MRI.createVirtualRegister(RC);
1020
1021 MVT PVT = getPointerTy(MF->getDataLayout());
1022 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1023 // For v = setjmp(buf), we generate.
1024 // Algorithm:
1025 //
1026 // ---------
1027 // | thisMBB |
1028 // ---------
1029 // |
1030 // ------------------------
1031 // | |
1032 // ---------- ---------------
1033 // | mainMBB | | restoreMBB |
1034 // | v = 0 | | v = 1 |
1035 // ---------- ---------------
1036 // | |
1037 // -------------------------
1038 // |
1039 // -----------------------------
1040 // | sinkMBB |
1041 // | phi(v_mainMBB,v_restoreMBB) |
1042 // -----------------------------
1043 // thisMBB:
1044 // buf[FPOffset] = Frame Pointer if hasFP.
1045 // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB.
1046 // buf[BCOffset] = Backchain value if building with -mbackchain.
1047 // buf[SPOffset] = Stack Pointer.
1048 // buf[LPOffset] = We never write this slot with R13, gcc stores R13 always.
1049 // SjLjSetup restoreMBB
1050 // mainMBB:
1051 // v_main = 0
1052 // sinkMBB:
1053 // v = phi(v_main, v_restore)
1054 // restoreMBB:
1055 // v_restore = 1
1056
1057 MachineBasicBlock *ThisMBB = MBB;
1058 MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
1059 MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
1060 MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
1061
1062 MF->insert(I, MainMBB);
1063 MF->insert(I, SinkMBB);
1064 MF->push_back(RestoreMBB);
1065 RestoreMBB->setMachineBlockAddressTaken();
1066
1068
1069 // Transfer the remainder of BB and its successor edges to sinkMBB.
1070 SinkMBB->splice(SinkMBB->begin(), MBB,
1071 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
1073
1074 // thisMBB:
1075 const int64_t FPOffset = 0; // Slot 1.
1076 const int64_t LabelOffset = 1 * PVT.getStoreSize(); // Slot 2.
1077 const int64_t BCOffset = 2 * PVT.getStoreSize(); // Slot 3.
1078 const int64_t SPOffset = 3 * PVT.getStoreSize(); // Slot 4.
1079
1080 // Buf address.
1081 Register BufReg = MI.getOperand(1).getReg();
1082
1083 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
1084 Register LabelReg = MRI.createVirtualRegister(PtrRC);
1085
1086 // Prepare IP for longjmp.
1087 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LARL), LabelReg)
1088 .addMBB(RestoreMBB);
1089 // Store IP for return from jmp, slot 2, offset = 1.
1090 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1091 .addReg(LabelReg)
1092 .addReg(BufReg)
1093 .addImm(LabelOffset)
1094 .addReg(0);
1095
1096 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1097 bool HasFP = Subtarget.getFrameLowering()->hasFP(*MF);
1098 if (HasFP) {
1099 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1100 .addReg(SpecialRegs->getFramePointerRegister())
1101 .addReg(BufReg)
1102 .addImm(FPOffset)
1103 .addReg(0);
1104 }
1105
1106 // Store SP.
1107 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1108 .addReg(SpecialRegs->getStackPointerRegister())
1109 .addReg(BufReg)
1110 .addImm(SPOffset)
1111 .addReg(0);
1112
1113 // Slot 3(Offset = 2) Backchain value (if building with -mbackchain).
1114 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1115 if (BackChain) {
1116 Register BCReg = MRI.createVirtualRegister(PtrRC);
1117 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1118 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1119 .addReg(SpecialRegs->getStackPointerRegister())
1120 .addImm(TFL->getBackchainOffset(*MF))
1121 .addReg(0);
1122
1123 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1124 .addReg(BCReg)
1125 .addReg(BufReg)
1126 .addImm(BCOffset)
1127 .addReg(0);
1128 }
1129
1130 // Setup.
1131 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::EH_SjLj_Setup))
1132 .addMBB(RestoreMBB);
1133
1134 const SystemZRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1135 MIB.addRegMask(RegInfo->getNoPreservedMask());
1136
1137 ThisMBB->addSuccessor(MainMBB);
1138 ThisMBB->addSuccessor(RestoreMBB);
1139
1140 // mainMBB:
1141 BuildMI(MainMBB, DL, TII->get(SystemZ::LHI), MainDstReg).addImm(0);
1142 MainMBB->addSuccessor(SinkMBB);
1143
1144 // sinkMBB:
1145 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(SystemZ::PHI), DstReg)
1146 .addReg(MainDstReg)
1147 .addMBB(MainMBB)
1148 .addReg(RestoreDstReg)
1149 .addMBB(RestoreMBB);
1150
1151 // restoreMBB.
1152 BuildMI(RestoreMBB, DL, TII->get(SystemZ::LHI), RestoreDstReg).addImm(1);
1153 BuildMI(RestoreMBB, DL, TII->get(SystemZ::J)).addMBB(SinkMBB);
1154 RestoreMBB->addSuccessor(SinkMBB);
1155
1156 MI.eraseFromParent();
1157
1158 return SinkMBB;
1159}
1160
1163 MachineBasicBlock *MBB) const {
1164
1165 DebugLoc DL = MI.getDebugLoc();
1166 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1167
1168 MachineFunction *MF = MBB->getParent();
1170
1171 MVT PVT = getPointerTy(MF->getDataLayout());
1172 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1173 Register BufReg = MI.getOperand(0).getReg();
1174 const TargetRegisterClass *RC = MRI.getRegClass(BufReg);
1175 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1176
1177 Register Tmp = MRI.createVirtualRegister(RC);
1178 Register BCReg = MRI.createVirtualRegister(RC);
1179
1181
1182 const int64_t FPOffset = 0;
1183 const int64_t LabelOffset = 1 * PVT.getStoreSize();
1184 const int64_t BCOffset = 2 * PVT.getStoreSize();
1185 const int64_t SPOffset = 3 * PVT.getStoreSize();
1186 const int64_t LPOffset = 4 * PVT.getStoreSize();
1187
1188 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), Tmp)
1189 .addReg(BufReg)
1190 .addImm(LabelOffset)
1191 .addReg(0);
1192
1193 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1194 SpecialRegs->getFramePointerRegister())
1195 .addReg(BufReg)
1196 .addImm(FPOffset)
1197 .addReg(0);
1198
1199 // We are restoring R13 even though we never stored in setjmp from llvm,
1200 // as gcc always stores R13 in builtin_setjmp. We could have mixed code
1201 // gcc setjmp and llvm longjmp.
1202 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), SystemZ::R13D)
1203 .addReg(BufReg)
1204 .addImm(LPOffset)
1205 .addReg(0);
1206
1207 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1208 if (BackChain) {
1209 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1210 .addReg(BufReg)
1211 .addImm(BCOffset)
1212 .addReg(0);
1213 }
1214
1215 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1216 SpecialRegs->getStackPointerRegister())
1217 .addReg(BufReg)
1218 .addImm(SPOffset)
1219 .addReg(0);
1220
1221 if (BackChain) {
1222 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1223 BuildMI(*MBB, MI, DL, TII->get(SystemZ::STG))
1224 .addReg(BCReg)
1225 .addReg(SpecialRegs->getStackPointerRegister())
1226 .addImm(TFL->getBackchainOffset(*MF))
1227 .addReg(0);
1228 }
1229
1230 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BR)).addReg(Tmp);
1231
1232 MI.eraseFromParent();
1233 return MBB;
1234}
1235
1236/// Returns true if stack probing through inline assembly is requested.
1238 // If the function specifically requests inline stack probes, emit them.
1239 if (MF.getFunction().hasFnAttribute("probe-stack"))
1240 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
1241 "inline-asm";
1242 return false;
1243}
1244
1249
1254
1257 const AtomicRMWInst *RMW) const {
1258 // Don't expand subword operations as they require special treatment.
1259 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
1261
1262 // Don't expand if there is a target instruction available.
1263 if (Subtarget.hasInterlockedAccess1() &&
1264 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
1271
1273}
1274
1276 // We can use CGFI or CLGFI.
1277 return isInt<32>(Imm) || isUInt<32>(Imm);
1278}
1279
1281 // We can use ALGFI or SLGFI.
1282 return isUInt<32>(Imm) || isUInt<32>(-Imm);
1283}
1284
1286 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
1287 // Unaligned accesses should never be slower than the expanded version.
1288 // We check specifically for aligned accesses in the few cases where
1289 // they are required.
1290 if (Fast)
1291 *Fast = 1;
1292 return true;
1293}
1294
1296 EVT VT = Y.getValueType();
1297
1298 // We can use NC(G)RK for types in GPRs ...
1299 if (VT == MVT::i32 || VT == MVT::i64)
1300 return Subtarget.hasMiscellaneousExtensions3();
1301
1302 // ... or VNC for types in VRs.
1303 if (VT.isVector() || VT == MVT::i128)
1304 return Subtarget.hasVector();
1305
1306 return false;
1307}
1308
1309// Information about the addressing mode for a memory access.
1311 // True if a long displacement is supported.
1313
1314 // True if use of index register is supported.
1316
1317 AddressingMode(bool LongDispl, bool IdxReg) :
1318 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
1319};
1320
1321// Return the desired addressing mode for a Load which has only one use (in
1322// the same block) which is a Store.
1324 Type *Ty) {
1325 // With vector support a Load->Store combination may be combined to either
1326 // an MVC or vector operations and it seems to work best to allow the
1327 // vector addressing mode.
1328 if (HasVector)
1329 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1330
1331 // Otherwise only the MVC case is special.
1332 bool MVC = Ty->isIntegerTy(8);
1333 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1334}
1335
1336// Return the addressing mode which seems most desirable given an LLVM
1337// Instruction pointer.
1338static AddressingMode
1341 switch (II->getIntrinsicID()) {
1342 default: break;
1343 case Intrinsic::memset:
1344 case Intrinsic::memmove:
1345 case Intrinsic::memcpy:
1346 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1347 }
1348 }
1349
1350 if (isa<LoadInst>(I) && I->hasOneUse()) {
1351 auto *SingleUser = cast<Instruction>(*I->user_begin());
1352 if (SingleUser->getParent() == I->getParent()) {
1353 if (isa<ICmpInst>(SingleUser)) {
1354 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1355 if (C->getBitWidth() <= 64 &&
1356 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1357 // Comparison of memory with 16 bit signed / unsigned immediate
1358 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1359 } else if (isa<StoreInst>(SingleUser))
1360 // Load->Store
1361 return getLoadStoreAddrMode(HasVector, I->getType());
1362 }
1363 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1364 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1365 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1366 // Load->Store
1367 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1368 }
1369
1370 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1371
1372 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1373 // dependencies (LDE only supports small offsets).
1374 // * Utilize the vector registers to hold floating point
1375 // values (vector load / store instructions only support small
1376 // offsets).
1377
1378 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1379 I->getOperand(0)->getType());
1380 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1381 bool IsVectorAccess = MemAccessTy->isVectorTy();
1382
1383 // A store of an extracted vector element will be combined into a VSTE type
1384 // instruction.
1385 if (!IsVectorAccess && isa<StoreInst>(I)) {
1386 Value *DataOp = I->getOperand(0);
1387 if (isa<ExtractElementInst>(DataOp))
1388 IsVectorAccess = true;
1389 }
1390
1391 // A load which gets inserted into a vector element will be combined into a
1392 // VLE type instruction.
1393 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1394 User *LoadUser = *I->user_begin();
1395 if (isa<InsertElementInst>(LoadUser))
1396 IsVectorAccess = true;
1397 }
1398
1399 if (IsFPAccess || IsVectorAccess)
1400 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1401 }
1402
1403 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1404}
1405
1407 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1408 // Punt on globals for now, although they can be used in limited
1409 // RELATIVE LONG cases.
1410 if (AM.BaseGV)
1411 return false;
1412
1413 // Require a 20-bit signed offset.
1414 if (!isInt<20>(AM.BaseOffs))
1415 return false;
1416
1417 bool RequireD12 =
1418 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1419 AddressingMode SupportedAM(!RequireD12, true);
1420 if (I != nullptr)
1421 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1422
1423 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1424 return false;
1425
1426 if (!SupportedAM.IndexReg)
1427 // No indexing allowed.
1428 return AM.Scale == 0;
1429 else
1430 // Indexing is OK but no scale factor can be applied.
1431 return AM.Scale == 0 || AM.Scale == 1;
1432}
1433
1435 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
1436 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
1437 const AttributeList &FuncAttributes) const {
1438 const int MVCFastLen = 16;
1439
1440 if (Limit != ~unsigned(0)) {
1441 // Don't expand Op into scalar loads/stores in these cases:
1442 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1443 return false; // Small memcpy: Use MVC
1444 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1445 return false; // Small memset (first byte with STC/MVI): Use MVC
1446 if (Op.isZeroMemset())
1447 return false; // Memset zero: Use XC
1448 }
1449
1450 return TargetLowering::findOptimalMemOpLowering(Context, MemOps, Limit, Op,
1451 DstAS, SrcAS, FuncAttributes);
1452}
1453
1455 LLVMContext &Context, const MemOp &Op,
1456 const AttributeList &FuncAttributes) const {
1457 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1458}
1459
1460bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1461 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1462 return false;
1463 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1464 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1465 return FromBits > ToBits;
1466}
1467
1469 if (!FromVT.isInteger() || !ToVT.isInteger())
1470 return false;
1471 unsigned FromBits = FromVT.getFixedSizeInBits();
1472 unsigned ToBits = ToVT.getFixedSizeInBits();
1473 return FromBits > ToBits;
1474}
1475
1476//===----------------------------------------------------------------------===//
1477// Inline asm support
1478//===----------------------------------------------------------------------===//
1479
1482 if (Constraint.size() == 1) {
1483 switch (Constraint[0]) {
1484 case 'a': // Address register
1485 case 'd': // Data register (equivalent to 'r')
1486 case 'f': // Floating-point register
1487 case 'h': // High-part register
1488 case 'r': // General-purpose register
1489 case 'v': // Vector register
1490 return C_RegisterClass;
1491
1492 case 'Q': // Memory with base and unsigned 12-bit displacement
1493 case 'R': // Likewise, plus an index
1494 case 'S': // Memory with base and signed 20-bit displacement
1495 case 'T': // Likewise, plus an index
1496 case 'm': // Equivalent to 'T'.
1497 return C_Memory;
1498
1499 case 'I': // Unsigned 8-bit constant
1500 case 'J': // Unsigned 12-bit constant
1501 case 'K': // Signed 16-bit constant
1502 case 'L': // Signed 20-bit displacement (on all targets we support)
1503 case 'M': // 0x7fffffff
1504 return C_Immediate;
1505
1506 default:
1507 break;
1508 }
1509 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1510 switch (Constraint[1]) {
1511 case 'Q': // Address with base and unsigned 12-bit displacement
1512 case 'R': // Likewise, plus an index
1513 case 'S': // Address with base and signed 20-bit displacement
1514 case 'T': // Likewise, plus an index
1515 return C_Address;
1516
1517 default:
1518 break;
1519 }
1520 } else if (Constraint.size() == 5 && Constraint.starts_with("{")) {
1521 if (StringRef("{@cc}").compare(Constraint) == 0)
1522 return C_Other;
1523 }
1524 return TargetLowering::getConstraintType(Constraint);
1525}
1526
1529 AsmOperandInfo &Info, const char *Constraint) const {
1531 Value *CallOperandVal = Info.CallOperandVal;
1532 // If we don't have a value, we can't do a match,
1533 // but allow it at the lowest weight.
1534 if (!CallOperandVal)
1535 return CW_Default;
1536 Type *type = CallOperandVal->getType();
1537 // Look at the constraint type.
1538 switch (*Constraint) {
1539 default:
1540 Weight = TargetLowering::getSingleConstraintMatchWeight(Info, Constraint);
1541 break;
1542
1543 case 'a': // Address register
1544 case 'd': // Data register (equivalent to 'r')
1545 case 'h': // High-part register
1546 case 'r': // General-purpose register
1547 Weight =
1548 CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1549 break;
1550
1551 case 'f': // Floating-point register
1552 if (!useSoftFloat())
1553 Weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1554 break;
1555
1556 case 'v': // Vector register
1557 if (Subtarget.hasVector())
1558 Weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1559 : CW_Default;
1560 break;
1561
1562 case 'I': // Unsigned 8-bit constant
1563 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1564 if (isUInt<8>(C->getZExtValue()))
1565 Weight = CW_Constant;
1566 break;
1567
1568 case 'J': // Unsigned 12-bit constant
1569 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1570 if (isUInt<12>(C->getZExtValue()))
1571 Weight = CW_Constant;
1572 break;
1573
1574 case 'K': // Signed 16-bit constant
1575 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1576 if (isInt<16>(C->getSExtValue()))
1577 Weight = CW_Constant;
1578 break;
1579
1580 case 'L': // Signed 20-bit displacement (on all targets we support)
1581 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1582 if (isInt<20>(C->getSExtValue()))
1583 Weight = CW_Constant;
1584 break;
1585
1586 case 'M': // 0x7fffffff
1587 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1588 if (C->getZExtValue() == 0x7fffffff)
1589 Weight = CW_Constant;
1590 break;
1591 }
1592 return Weight;
1593}
1594
1595// Parse a "{tNNN}" register constraint for which the register type "t"
1596// has already been verified. MC is the class associated with "t" and
1597// Map maps 0-based register numbers to LLVM register numbers.
1598static std::pair<unsigned, const TargetRegisterClass *>
1600 const unsigned *Map, unsigned Size) {
1601 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1602 if (isdigit(Constraint[2])) {
1603 unsigned Index;
1604 bool Failed =
1605 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1606 if (!Failed && Index < Size && Map[Index])
1607 return std::make_pair(Map[Index], RC);
1608 }
1609 return std::make_pair(0U, nullptr);
1610}
1611
1612std::pair<unsigned, const TargetRegisterClass *>
1614 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1615 if (Constraint.size() == 1) {
1616 // GCC Constraint Letters
1617 switch (Constraint[0]) {
1618 default: break;
1619 case 'd': // Data register (equivalent to 'r')
1620 case 'r': // General-purpose register
1621 if (VT.getSizeInBits() == 64)
1622 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1623 else if (VT.getSizeInBits() == 128)
1624 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1625 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1626
1627 case 'a': // Address register
1628 if (VT == MVT::i64)
1629 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1630 else if (VT == MVT::i128)
1631 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1632 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1633
1634 case 'h': // High-part register (an LLVM extension)
1635 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1636
1637 case 'f': // Floating-point register
1638 if (!useSoftFloat()) {
1639 if (VT.getSizeInBits() == 16)
1640 return std::make_pair(0U, &SystemZ::FP16BitRegClass);
1641 else if (VT.getSizeInBits() == 64)
1642 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1643 else if (VT.getSizeInBits() == 128)
1644 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1645 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1646 }
1647 break;
1648
1649 case 'v': // Vector register
1650 if (Subtarget.hasVector()) {
1651 if (VT.getSizeInBits() == 16)
1652 return std::make_pair(0U, &SystemZ::VR16BitRegClass);
1653 if (VT.getSizeInBits() == 32)
1654 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1655 if (VT.getSizeInBits() == 64)
1656 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1657 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1658 }
1659 break;
1660 }
1661 }
1662 if (Constraint.starts_with("{")) {
1663
1664 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1665 // to check the size on.
1666 auto getVTSizeInBits = [&VT]() {
1667 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1668 };
1669
1670 // We need to override the default register parsing for GPRs and FPRs
1671 // because the interpretation depends on VT. The internal names of
1672 // the registers are also different from the external names
1673 // (F0D and F0S instead of F0, etc.).
1674 if (Constraint[1] == 'r') {
1675 if (getVTSizeInBits() == 32)
1676 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1678 if (getVTSizeInBits() == 128)
1679 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1681 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1683 }
1684 if (Constraint[1] == 'f') {
1685 if (useSoftFloat())
1686 return std::make_pair(
1687 0u, static_cast<const TargetRegisterClass *>(nullptr));
1688 if (getVTSizeInBits() == 16)
1689 return parseRegisterNumber(Constraint, &SystemZ::FP16BitRegClass,
1691 if (getVTSizeInBits() == 32)
1692 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1694 if (getVTSizeInBits() == 128)
1695 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1697 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1699 }
1700 if (Constraint[1] == 'v') {
1701 if (!Subtarget.hasVector())
1702 return std::make_pair(
1703 0u, static_cast<const TargetRegisterClass *>(nullptr));
1704 if (getVTSizeInBits() == 16)
1705 return parseRegisterNumber(Constraint, &SystemZ::VR16BitRegClass,
1707 if (getVTSizeInBits() == 32)
1708 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1710 if (getVTSizeInBits() == 64)
1711 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1713 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1715 }
1716 if (Constraint[1] == '@') {
1717 if (StringRef("{@cc}").compare(Constraint) == 0)
1718 return std::make_pair(SystemZ::CC, &SystemZ::CCRRegClass);
1719 }
1720 }
1721 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1722}
1723
1724// FIXME? Maybe this could be a TableGen attribute on some registers and
1725// this table could be generated automatically from RegInfo.
1728 const MachineFunction &MF) const {
1729 Register Reg =
1731 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D
1732 : SystemZ::NoRegister)
1733 .Case("r15",
1734 Subtarget.isTargetELF() ? SystemZ::R15D : SystemZ::NoRegister)
1735 .Default(Register());
1736
1737 return Reg;
1738}
1739
1741 const Constant *PersonalityFn) const {
1742 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1743}
1744
1746 const Constant *PersonalityFn) const {
1747 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1748}
1749
1750// Convert condition code in CCReg to an i32 value.
1752 SDLoc DL(CCReg);
1753 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
1754 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
1755 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
1756}
1757
1758// Lower @cc targets via setcc.
1760 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
1761 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
1762 if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0)
1763 return SDValue();
1764
1765 // Check that return type is valid.
1766 if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
1767 OpInfo.ConstraintVT.getSizeInBits() < 8)
1768 report_fatal_error("Glue output operand is of invalid type");
1769
1770 if (Glue.getNode()) {
1771 Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue);
1772 Chain = Glue.getValue(1);
1773 } else
1774 Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32);
1775 return getCCResult(DAG, Glue);
1776}
1777
1779 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1780 SelectionDAG &DAG) const {
1781 // Only support length 1 constraints for now.
1782 if (Constraint.size() == 1) {
1783 switch (Constraint[0]) {
1784 case 'I': // Unsigned 8-bit constant
1785 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1786 if (isUInt<8>(C->getZExtValue()))
1787 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1788 Op.getValueType()));
1789 return;
1790
1791 case 'J': // Unsigned 12-bit constant
1792 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1793 if (isUInt<12>(C->getZExtValue()))
1794 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1795 Op.getValueType()));
1796 return;
1797
1798 case 'K': // Signed 16-bit constant
1799 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1800 if (isInt<16>(C->getSExtValue()))
1801 Ops.push_back(DAG.getSignedTargetConstant(
1802 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1803 return;
1804
1805 case 'L': // Signed 20-bit displacement (on all targets we support)
1806 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1807 if (isInt<20>(C->getSExtValue()))
1808 Ops.push_back(DAG.getSignedTargetConstant(
1809 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1810 return;
1811
1812 case 'M': // 0x7fffffff
1813 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1814 if (C->getZExtValue() == 0x7fffffff)
1815 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1816 Op.getValueType()));
1817 return;
1818 }
1819 }
1821}
1822
1823//===----------------------------------------------------------------------===//
1824// Calling conventions
1825//===----------------------------------------------------------------------===//
1826
1827#include "SystemZGenCallingConv.inc"
1828
1830 CallingConv::ID) const {
1831 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1832 SystemZ::R14D, 0 };
1833 return ScratchRegs;
1834}
1835
1837 Type *ToType) const {
1838 return isTruncateFree(FromType, ToType);
1839}
1840
1842 return CI->isTailCall();
1843}
1844
1845// Value is a value that has been passed to us in the location described by VA
1846// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1847// any loads onto Chain.
1849 CCValAssign &VA, SDValue Chain,
1850 SDValue Value) {
1851 // If the argument has been promoted from a smaller type, insert an
1852 // assertion to capture this.
1853 if (VA.getLocInfo() == CCValAssign::SExt)
1855 DAG.getValueType(VA.getValVT()));
1856 else if (VA.getLocInfo() == CCValAssign::ZExt)
1858 DAG.getValueType(VA.getValVT()));
1859
1860 if (VA.isExtInLoc())
1861 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1862 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1863 // If this is a short vector argument loaded from the stack,
1864 // extend from i64 to full vector size and then bitcast.
1865 assert(VA.getLocVT() == MVT::i64);
1866 assert(VA.getValVT().isVector());
1867 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1868 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1869 } else
1870 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1871 return Value;
1872}
1873
1874// Value is a value of type VA.getValVT() that we need to copy into
1875// the location described by VA. Return a copy of Value converted to
1876// VA.getValVT(). The caller is responsible for handling indirect values.
1878 CCValAssign &VA, SDValue Value) {
1879 switch (VA.getLocInfo()) {
1880 case CCValAssign::SExt:
1881 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1882 case CCValAssign::ZExt:
1883 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1884 case CCValAssign::AExt:
1885 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1886 case CCValAssign::BCvt: {
1887 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1888 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1889 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1890 // For an f32 vararg we need to first promote it to an f64 and then
1891 // bitcast it to an i64.
1892 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1893 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1894 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1895 ? MVT::v2i64
1896 : VA.getLocVT();
1897 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1898 // For ELF, this is a short vector argument to be stored to the stack,
1899 // bitcast to v2i64 and then extract first element.
1900 if (BitCastToType == MVT::v2i64)
1901 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1902 DAG.getConstant(0, DL, MVT::i32));
1903 return Value;
1904 }
1905 case CCValAssign::Full:
1906 return Value;
1907 default:
1908 llvm_unreachable("Unhandled getLocInfo()");
1909 }
1910}
1911
1913 SDLoc DL(In);
1914 SDValue Lo, Hi;
1915 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1916 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1917 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1918 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1919 DAG.getConstant(64, DL, MVT::i32)));
1920 } else {
1921 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1922 }
1923
1924 // FIXME: If v2i64 were a legal type, we could use it instead of
1925 // Untyped here. This might enable improved folding.
1926 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1927 MVT::Untyped, Hi, Lo);
1928 return SDValue(Pair, 0);
1929}
1930
1932 SDLoc DL(In);
1933 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1934 DL, MVT::i64, In);
1935 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1936 DL, MVT::i64, In);
1937
1938 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1939 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1940 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1941 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1942 DAG.getConstant(64, DL, MVT::i32));
1943 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1944 } else {
1945 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1946 }
1947}
1948
1950 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1951 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1952 EVT ValueVT = Val.getValueType();
1953 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1954 // Inline assembly operand.
1955 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1956 return true;
1957 }
1958
1959 return false;
1960}
1961
1963 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1964 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1965 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1966 // Inline assembly operand.
1967 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1968 return DAG.getBitcast(ValueVT, Res);
1969 }
1970
1971 return SDValue();
1972}
1973
1974// The first part of a split stack argument is at index I in Args (and
1975// ArgLocs). Return the type of a part and the number of them by reference.
1976template <class ArgTy>
1978 SmallVector<CCValAssign, 16> &ArgLocs, unsigned I,
1979 MVT &PartVT, unsigned &NumParts) {
1980 if (!Args[I].Flags.isSplit())
1981 return false;
1982 assert(I < ArgLocs.size() && ArgLocs.size() == Args.size() &&
1983 "ArgLocs havoc.");
1984 PartVT = ArgLocs[I].getValVT();
1985 NumParts = 1;
1986 for (unsigned PartIdx = I + 1;; ++PartIdx) {
1987 assert(PartIdx != ArgLocs.size() && "SplitEnd not found.");
1988 assert(ArgLocs[PartIdx].getValVT() == PartVT && "Unsupported split.");
1989 ++NumParts;
1990 if (Args[PartIdx].Flags.isSplitEnd())
1991 break;
1992 }
1993 return true;
1994}
1995
1997 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1998 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1999 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2001 MachineFrameInfo &MFI = MF.getFrameInfo();
2003 SystemZMachineFunctionInfo *FuncInfo =
2005 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
2006 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2007
2008 // Assign locations to all of the incoming arguments.
2010 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
2011 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
2012 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
2013
2014 unsigned NumFixedGPRs = 0;
2015 unsigned NumFixedFPRs = 0;
2016 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2017 SDValue ArgValue;
2018 CCValAssign &VA = ArgLocs[I];
2019 EVT LocVT = VA.getLocVT();
2020 if (VA.isRegLoc()) {
2021 // Arguments passed in registers
2022 const TargetRegisterClass *RC;
2023 switch (LocVT.getSimpleVT().SimpleTy) {
2024 default:
2025 // Integers smaller than i64 should be promoted to i64.
2026 llvm_unreachable("Unexpected argument type");
2027 case MVT::i32:
2028 NumFixedGPRs += 1;
2029 RC = &SystemZ::GR32BitRegClass;
2030 break;
2031 case MVT::i64:
2032 NumFixedGPRs += 1;
2033 RC = &SystemZ::GR64BitRegClass;
2034 break;
2035 case MVT::f16:
2036 NumFixedFPRs += 1;
2037 RC = &SystemZ::FP16BitRegClass;
2038 break;
2039 case MVT::f32:
2040 NumFixedFPRs += 1;
2041 RC = &SystemZ::FP32BitRegClass;
2042 break;
2043 case MVT::f64:
2044 NumFixedFPRs += 1;
2045 RC = &SystemZ::FP64BitRegClass;
2046 break;
2047 case MVT::f128:
2048 NumFixedFPRs += 2;
2049 RC = &SystemZ::FP128BitRegClass;
2050 break;
2051 case MVT::v16i8:
2052 case MVT::v8i16:
2053 case MVT::v4i32:
2054 case MVT::v2i64:
2055 case MVT::v4f32:
2056 case MVT::v2f64:
2057 RC = &SystemZ::VR128BitRegClass;
2058 break;
2059 }
2060
2061 Register VReg = MRI.createVirtualRegister(RC);
2062 MRI.addLiveIn(VA.getLocReg(), VReg);
2063 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
2064 } else {
2065 assert(VA.isMemLoc() && "Argument not register or memory");
2066
2067 // Create the frame index object for this incoming parameter.
2068 // FIXME: Pre-include call frame size in the offset, should not
2069 // need to manually add it here.
2070 int64_t ArgSPOffset = VA.getLocMemOffset();
2071 if (Subtarget.isTargetXPLINK64()) {
2072 auto &XPRegs =
2073 Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
2074 ArgSPOffset += XPRegs.getCallFrameSize();
2075 }
2076 int FI =
2077 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
2078
2079 // Create the SelectionDAG nodes corresponding to a load
2080 // from this parameter. Unpromoted ints and floats are
2081 // passed as right-justified 8-byte values.
2082 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2083 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32 ||
2084 VA.getLocVT() == MVT::f16) {
2085 unsigned SlotOffs = VA.getLocVT() == MVT::f16 ? 6 : 4;
2086 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
2087 DAG.getIntPtrConstant(SlotOffs, DL));
2088 }
2089 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
2091 }
2092
2093 // Convert the value of the argument register into the value that's
2094 // being passed.
2095 if (VA.getLocInfo() == CCValAssign::Indirect) {
2096 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
2098 // If the original argument was split (e.g. i128), we need
2099 // to load all parts of it here (using the same address).
2100 MVT PartVT;
2101 unsigned NumParts;
2102 if (analyzeArgSplit(Ins, ArgLocs, I, PartVT, NumParts)) {
2103 for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
2104 ++I;
2105 CCValAssign &PartVA = ArgLocs[I];
2106 unsigned PartOffset = Ins[I].PartOffset;
2107 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
2108 DAG.getIntPtrConstant(PartOffset, DL));
2109 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
2111 assert(PartOffset && "Offset should be non-zero.");
2112 }
2113 }
2114 } else
2115 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
2116 }
2117
2118 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
2119 // Save the number of non-varargs registers for later use by va_start, etc.
2120 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2121 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2122
2123 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2124 Subtarget.getSpecialRegisters());
2125
2126 // Likewise the address (in the form of a frame index) of where the
2127 // first stack vararg would be. The 1-byte size here is arbitrary.
2128 // FIXME: Pre-include call frame size in the offset, should not
2129 // need to manually add it here.
2130 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
2131 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
2132 FuncInfo->setVarArgsFrameIndex(FI);
2133 }
2134
2135 if (IsVarArg && Subtarget.isTargetELF()) {
2136 // Save the number of non-varargs registers for later use by va_start, etc.
2137 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2138 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2139
2140 // Likewise the address (in the form of a frame index) of where the
2141 // first stack vararg would be. The 1-byte size here is arbitrary.
2142 int64_t VarArgsOffset = CCInfo.getStackSize();
2143 FuncInfo->setVarArgsFrameIndex(
2144 MFI.CreateFixedObject(1, VarArgsOffset, true));
2145
2146 // ...and a similar frame index for the caller-allocated save area
2147 // that will be used to store the incoming registers.
2148 int64_t RegSaveOffset =
2149 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
2150 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
2151 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
2152
2153 // Store the FPR varargs in the reserved frame slots. (We store the
2154 // GPRs as part of the prologue.)
2155 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
2157 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
2158 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
2159 int FI =
2161 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2163 &SystemZ::FP64BitRegClass);
2164 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
2165 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
2167 }
2168 // Join the stores, which are independent of one another.
2169 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2170 ArrayRef(&MemOps[NumFixedFPRs],
2171 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
2172 }
2173 }
2174
2175 if (Subtarget.isTargetXPLINK64()) {
2176 // Create virual register for handling incoming "ADA" special register (R5)
2177 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
2178 Register ADAvReg = MRI.createVirtualRegister(RC);
2179 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2180 Subtarget.getSpecialRegisters());
2181 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
2182 FuncInfo->setADAVirtualRegister(ADAvReg);
2183 }
2184 return Chain;
2185}
2186
2187static bool canUseSiblingCall(const CCState &ArgCCInfo,
2190 // Punt if there are any indirect or stack arguments, or if the call
2191 // needs the callee-saved argument register R6, or if the call uses
2192 // the callee-saved register arguments SwiftSelf and SwiftError.
2193 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2194 CCValAssign &VA = ArgLocs[I];
2196 return false;
2197 if (!VA.isRegLoc())
2198 return false;
2199 Register Reg = VA.getLocReg();
2200 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
2201 return false;
2202 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
2203 return false;
2204 }
2205 return true;
2206}
2207
2209 unsigned Offset, bool LoadAdr = false) {
2212 Register ADAvReg = MFI->getADAVirtualRegister();
2214
2215 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
2216 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
2217
2218 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
2219 if (!LoadAdr)
2220 Result = DAG.getLoad(
2221 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
2223
2224 return Result;
2225}
2226
2227// ADA access using Global value
2228// Note: for functions, address of descriptor is returned
2230 EVT PtrVT) {
2231 unsigned ADAtype;
2232 bool LoadAddr = false;
2233 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
2234 bool IsFunction =
2235 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
2236 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
2237
2238 if (IsFunction) {
2239 if (IsInternal) {
2241 LoadAddr = true;
2242 } else
2244 } else {
2246 }
2247 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
2248
2249 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
2250}
2251
2252static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
2253 SDLoc &DL, SDValue &Chain) {
2254 unsigned ADADelta = 0; // ADA offset in desc.
2255 unsigned EPADelta = 8; // EPA offset in desc.
2258
2259 // XPLink calling convention.
2260 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2261 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
2262 G->getGlobal()->hasPrivateLinkage());
2263 if (IsInternal) {
2266 Register ADAvReg = MFI->getADAVirtualRegister();
2267 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
2268 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2269 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2270 return true;
2271 } else {
2273 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2274 ADA = getADAEntry(DAG, GA, DL, ADADelta);
2275 Callee = getADAEntry(DAG, GA, DL, EPADelta);
2276 }
2277 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2279 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2280 ADA = getADAEntry(DAG, ES, DL, ADADelta);
2281 Callee = getADAEntry(DAG, ES, DL, EPADelta);
2282 } else {
2283 // Function pointer case
2284 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2285 DAG.getConstant(ADADelta, DL, PtrVT));
2286 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
2288 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2289 DAG.getConstant(EPADelta, DL, PtrVT));
2290 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
2292 }
2293 return false;
2294}
2295
2296SDValue
2298 SmallVectorImpl<SDValue> &InVals) const {
2299 SelectionDAG &DAG = CLI.DAG;
2300 SDLoc &DL = CLI.DL;
2302 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2304 SDValue Chain = CLI.Chain;
2305 SDValue Callee = CLI.Callee;
2306 bool &IsTailCall = CLI.IsTailCall;
2307 CallingConv::ID CallConv = CLI.CallConv;
2308 bool IsVarArg = CLI.IsVarArg;
2310 EVT PtrVT = getPointerTy(MF.getDataLayout());
2311 LLVMContext &Ctx = *DAG.getContext();
2312 SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
2313
2314 // FIXME: z/OS support to be added in later.
2315 if (Subtarget.isTargetXPLINK64())
2316 IsTailCall = false;
2317
2318 // Integer args <=32 bits should have an extension attribute.
2319 verifyNarrowIntegerArgs_Call(Outs, &MF.getFunction(), Callee);
2320
2321 // Analyze the operands of the call, assigning locations to each operand.
2323 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
2324 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
2325
2326 // We don't support GuaranteedTailCallOpt, only automatically-detected
2327 // sibling calls.
2328 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
2329 IsTailCall = false;
2330
2331 // Get a count of how many bytes are to be pushed on the stack.
2332 unsigned NumBytes = ArgCCInfo.getStackSize();
2333
2334 // Mark the start of the call.
2335 if (!IsTailCall)
2336 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
2337
2338 // Copy argument values to their designated locations.
2340 SmallVector<SDValue, 8> MemOpChains;
2341 SDValue StackPtr;
2342 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2343 CCValAssign &VA = ArgLocs[I];
2344 SDValue ArgValue = OutVals[I];
2345
2346 if (VA.getLocInfo() == CCValAssign::Indirect) {
2347 // Store the argument in a stack slot and pass its address.
2348 EVT SlotVT;
2349 MVT PartVT;
2350 unsigned NumParts = 1;
2351 if (analyzeArgSplit(Outs, ArgLocs, I, PartVT, NumParts))
2352 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * NumParts);
2353 else
2354 SlotVT = Outs[I].VT;
2355 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
2356 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2357 MemOpChains.push_back(
2358 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
2360 // If the original argument was split (e.g. i128), we need
2361 // to store all parts of it here (and pass just one address).
2362 assert(Outs[I].PartOffset == 0);
2363 for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
2364 ++I;
2365 SDValue PartValue = OutVals[I];
2366 unsigned PartOffset = Outs[I].PartOffset;
2367 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
2368 DAG.getIntPtrConstant(PartOffset, DL));
2369 MemOpChains.push_back(
2370 DAG.getStore(Chain, DL, PartValue, Address,
2372 assert(PartOffset && "Offset should be non-zero.");
2373 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
2374 SlotVT.getStoreSize()) && "Not enough space for argument part!");
2375 }
2376 ArgValue = SpillSlot;
2377 } else
2378 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
2379
2380 if (VA.isRegLoc()) {
2381 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
2382 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
2383 // and low values.
2384 if (VA.getLocVT() == MVT::i128)
2385 ArgValue = lowerI128ToGR128(DAG, ArgValue);
2386 // Queue up the argument copies and emit them at the end.
2387 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
2388 } else {
2389 assert(VA.isMemLoc() && "Argument not register or memory");
2390
2391 // Work out the address of the stack slot. Unpromoted ints and
2392 // floats are passed as right-justified 8-byte values.
2393 if (!StackPtr.getNode())
2394 StackPtr = DAG.getCopyFromReg(Chain, DL,
2395 Regs->getStackPointerRegister(), PtrVT);
2396 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
2397 VA.getLocMemOffset();
2398 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
2399 Offset += 4;
2400 else if (VA.getLocVT() == MVT::f16)
2401 Offset += 6;
2402 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2404
2405 // Emit the store.
2406 MemOpChains.push_back(
2407 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2408
2409 // Although long doubles or vectors are passed through the stack when
2410 // they are vararg (non-fixed arguments), if a long double or vector
2411 // occupies the third and fourth slot of the argument list GPR3 should
2412 // still shadow the third slot of the argument list.
2413 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2414 SDValue ShadowArgValue =
2415 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2416 DAG.getIntPtrConstant(1, DL));
2417 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2418 }
2419 }
2420 }
2421
2422 // Join the stores, which are independent of one another.
2423 if (!MemOpChains.empty())
2424 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2425
2426 // Accept direct calls by converting symbolic call addresses to the
2427 // associated Target* opcodes. Force %r1 to be used for indirect
2428 // tail calls.
2429 SDValue Glue;
2430
2431 if (Subtarget.isTargetXPLINK64()) {
2432 SDValue ADA;
2433 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2434 if (!IsBRASL) {
2435 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2436 ->getAddressOfCalleeRegister();
2437 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2438 Glue = Chain.getValue(1);
2439 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2440 }
2441 RegsToPass.push_back(std::make_pair(
2442 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2443 } else {
2444 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2445 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2446 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2447 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2448 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2449 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2450 } else if (IsTailCall) {
2451 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2452 Glue = Chain.getValue(1);
2453 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2454 }
2455 }
2456
2457 // Build a sequence of copy-to-reg nodes, chained and glued together.
2458 for (const auto &[Reg, N] : RegsToPass) {
2459 Chain = DAG.getCopyToReg(Chain, DL, Reg, N, Glue);
2460 Glue = Chain.getValue(1);
2461 }
2462
2463 // The first call operand is the chain and the second is the target address.
2465 Ops.push_back(Chain);
2466 Ops.push_back(Callee);
2467
2468 // Add argument registers to the end of the list so that they are
2469 // known live into the call.
2470 for (const auto &[Reg, N] : RegsToPass)
2471 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
2472
2473 // Add a register mask operand representing the call-preserved registers.
2474 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2475 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2476 assert(Mask && "Missing call preserved mask for calling convention");
2477 Ops.push_back(DAG.getRegisterMask(Mask));
2478
2479 // Glue the call to the argument copies, if any.
2480 if (Glue.getNode())
2481 Ops.push_back(Glue);
2482
2483 // Emit the call.
2484 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2485 if (IsTailCall) {
2486 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2487 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2488 return Ret;
2489 }
2490 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2491 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2492 Glue = Chain.getValue(1);
2493
2494 // Mark the end of the call, which is glued to the call itself.
2495 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2496 Glue = Chain.getValue(1);
2497
2498 // Assign locations to each value returned by this call.
2500 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2501 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2502
2503 // Copy all of the result registers out of their specified physreg.
2504 for (CCValAssign &VA : RetLocs) {
2505 // Copy the value out, gluing the copy to the end of the call sequence.
2506 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2507 VA.getLocVT(), Glue);
2508 Chain = RetValue.getValue(1);
2509 Glue = RetValue.getValue(2);
2510
2511 // Convert the value of the return register into the value that's
2512 // being returned.
2513 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2514 }
2515
2516 return Chain;
2517}
2518
2519// Generate a call taking the given operands as arguments and returning a
2520// result of type RetVT.
2522 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2523 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2524 bool DoesNotReturn, bool IsReturnValueUsed) const {
2526 Args.reserve(Ops.size());
2527
2528 for (SDValue Op : Ops) {
2530 Op, Op.getValueType().getTypeForEVT(*DAG.getContext()));
2531 Entry.IsSExt = shouldSignExtendTypeInLibCall(Entry.Ty, IsSigned);
2532 Entry.IsZExt = !Entry.IsSExt;
2533 Args.push_back(Entry);
2534 }
2535
2536 SDValue Callee =
2537 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2538
2539 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2541 bool SignExtend = shouldSignExtendTypeInLibCall(RetTy, IsSigned);
2542 CLI.setDebugLoc(DL)
2543 .setChain(Chain)
2544 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2545 .setNoReturn(DoesNotReturn)
2546 .setDiscardResult(!IsReturnValueUsed)
2547 .setSExtResult(SignExtend)
2548 .setZExtResult(!SignExtend);
2549 return LowerCallTo(CLI);
2550}
2551
2553 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
2554 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
2555 const Type *RetTy) const {
2556 // Special case that we cannot easily detect in RetCC_SystemZ since
2557 // i128 may not be a legal type.
2558 for (auto &Out : Outs)
2559 if (Out.ArgVT.isScalarInteger() && Out.ArgVT.getSizeInBits() > 64)
2560 return false;
2561
2563 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Context);
2564 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2565}
2566
2567SDValue
2569 bool IsVarArg,
2571 const SmallVectorImpl<SDValue> &OutVals,
2572 const SDLoc &DL, SelectionDAG &DAG) const {
2574
2575 // Integer args <=32 bits should have an extension attribute.
2576 verifyNarrowIntegerArgs_Ret(Outs, &MF.getFunction());
2577
2578 // Assign locations to each returned value.
2580 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2581 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2582
2583 // Quick exit for void returns
2584 if (RetLocs.empty())
2585 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2586
2587 if (CallConv == CallingConv::GHC)
2588 report_fatal_error("GHC functions return void only");
2589
2590 // Copy the result values into the output registers.
2591 SDValue Glue;
2593 RetOps.push_back(Chain);
2594 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2595 CCValAssign &VA = RetLocs[I];
2596 SDValue RetValue = OutVals[I];
2597
2598 // Make the return register live on exit.
2599 assert(VA.isRegLoc() && "Can only return in registers!");
2600
2601 // Promote the value as required.
2602 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2603
2604 // Chain and glue the copies together.
2605 Register Reg = VA.getLocReg();
2606 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2607 Glue = Chain.getValue(1);
2608 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2609 }
2610
2611 // Update chain and glue.
2612 RetOps[0] = Chain;
2613 if (Glue.getNode())
2614 RetOps.push_back(Glue);
2615
2616 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2617}
2618
2619// Return true if Op is an intrinsic node with chain that returns the CC value
2620// as its only (other) argument. Provide the associated SystemZISD opcode and
2621// the mask of valid CC values if so.
2622static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2623 unsigned &CCValid) {
2624 unsigned Id = Op.getConstantOperandVal(1);
2625 switch (Id) {
2626 case Intrinsic::s390_tbegin:
2627 Opcode = SystemZISD::TBEGIN;
2628 CCValid = SystemZ::CCMASK_TBEGIN;
2629 return true;
2630
2631 case Intrinsic::s390_tbegin_nofloat:
2632 Opcode = SystemZISD::TBEGIN_NOFLOAT;
2633 CCValid = SystemZ::CCMASK_TBEGIN;
2634 return true;
2635
2636 case Intrinsic::s390_tend:
2637 Opcode = SystemZISD::TEND;
2638 CCValid = SystemZ::CCMASK_TEND;
2639 return true;
2640
2641 default:
2642 return false;
2643 }
2644}
2645
2646// Return true if Op is an intrinsic node without chain that returns the
2647// CC value as its final argument. Provide the associated SystemZISD
2648// opcode and the mask of valid CC values if so.
2649static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2650 unsigned Id = Op.getConstantOperandVal(0);
2651 switch (Id) {
2652 case Intrinsic::s390_vpkshs:
2653 case Intrinsic::s390_vpksfs:
2654 case Intrinsic::s390_vpksgs:
2655 Opcode = SystemZISD::PACKS_CC;
2656 CCValid = SystemZ::CCMASK_VCMP;
2657 return true;
2658
2659 case Intrinsic::s390_vpklshs:
2660 case Intrinsic::s390_vpklsfs:
2661 case Intrinsic::s390_vpklsgs:
2662 Opcode = SystemZISD::PACKLS_CC;
2663 CCValid = SystemZ::CCMASK_VCMP;
2664 return true;
2665
2666 case Intrinsic::s390_vceqbs:
2667 case Intrinsic::s390_vceqhs:
2668 case Intrinsic::s390_vceqfs:
2669 case Intrinsic::s390_vceqgs:
2670 case Intrinsic::s390_vceqqs:
2671 Opcode = SystemZISD::VICMPES;
2672 CCValid = SystemZ::CCMASK_VCMP;
2673 return true;
2674
2675 case Intrinsic::s390_vchbs:
2676 case Intrinsic::s390_vchhs:
2677 case Intrinsic::s390_vchfs:
2678 case Intrinsic::s390_vchgs:
2679 case Intrinsic::s390_vchqs:
2680 Opcode = SystemZISD::VICMPHS;
2681 CCValid = SystemZ::CCMASK_VCMP;
2682 return true;
2683
2684 case Intrinsic::s390_vchlbs:
2685 case Intrinsic::s390_vchlhs:
2686 case Intrinsic::s390_vchlfs:
2687 case Intrinsic::s390_vchlgs:
2688 case Intrinsic::s390_vchlqs:
2689 Opcode = SystemZISD::VICMPHLS;
2690 CCValid = SystemZ::CCMASK_VCMP;
2691 return true;
2692
2693 case Intrinsic::s390_vtm:
2694 Opcode = SystemZISD::VTM;
2695 CCValid = SystemZ::CCMASK_VCMP;
2696 return true;
2697
2698 case Intrinsic::s390_vfaebs:
2699 case Intrinsic::s390_vfaehs:
2700 case Intrinsic::s390_vfaefs:
2701 Opcode = SystemZISD::VFAE_CC;
2702 CCValid = SystemZ::CCMASK_ANY;
2703 return true;
2704
2705 case Intrinsic::s390_vfaezbs:
2706 case Intrinsic::s390_vfaezhs:
2707 case Intrinsic::s390_vfaezfs:
2708 Opcode = SystemZISD::VFAEZ_CC;
2709 CCValid = SystemZ::CCMASK_ANY;
2710 return true;
2711
2712 case Intrinsic::s390_vfeebs:
2713 case Intrinsic::s390_vfeehs:
2714 case Intrinsic::s390_vfeefs:
2715 Opcode = SystemZISD::VFEE_CC;
2716 CCValid = SystemZ::CCMASK_ANY;
2717 return true;
2718
2719 case Intrinsic::s390_vfeezbs:
2720 case Intrinsic::s390_vfeezhs:
2721 case Intrinsic::s390_vfeezfs:
2722 Opcode = SystemZISD::VFEEZ_CC;
2723 CCValid = SystemZ::CCMASK_ANY;
2724 return true;
2725
2726 case Intrinsic::s390_vfenebs:
2727 case Intrinsic::s390_vfenehs:
2728 case Intrinsic::s390_vfenefs:
2729 Opcode = SystemZISD::VFENE_CC;
2730 CCValid = SystemZ::CCMASK_ANY;
2731 return true;
2732
2733 case Intrinsic::s390_vfenezbs:
2734 case Intrinsic::s390_vfenezhs:
2735 case Intrinsic::s390_vfenezfs:
2736 Opcode = SystemZISD::VFENEZ_CC;
2737 CCValid = SystemZ::CCMASK_ANY;
2738 return true;
2739
2740 case Intrinsic::s390_vistrbs:
2741 case Intrinsic::s390_vistrhs:
2742 case Intrinsic::s390_vistrfs:
2743 Opcode = SystemZISD::VISTR_CC;
2745 return true;
2746
2747 case Intrinsic::s390_vstrcbs:
2748 case Intrinsic::s390_vstrchs:
2749 case Intrinsic::s390_vstrcfs:
2750 Opcode = SystemZISD::VSTRC_CC;
2751 CCValid = SystemZ::CCMASK_ANY;
2752 return true;
2753
2754 case Intrinsic::s390_vstrczbs:
2755 case Intrinsic::s390_vstrczhs:
2756 case Intrinsic::s390_vstrczfs:
2757 Opcode = SystemZISD::VSTRCZ_CC;
2758 CCValid = SystemZ::CCMASK_ANY;
2759 return true;
2760
2761 case Intrinsic::s390_vstrsb:
2762 case Intrinsic::s390_vstrsh:
2763 case Intrinsic::s390_vstrsf:
2764 Opcode = SystemZISD::VSTRS_CC;
2765 CCValid = SystemZ::CCMASK_ANY;
2766 return true;
2767
2768 case Intrinsic::s390_vstrszb:
2769 case Intrinsic::s390_vstrszh:
2770 case Intrinsic::s390_vstrszf:
2771 Opcode = SystemZISD::VSTRSZ_CC;
2772 CCValid = SystemZ::CCMASK_ANY;
2773 return true;
2774
2775 case Intrinsic::s390_vfcedbs:
2776 case Intrinsic::s390_vfcesbs:
2777 Opcode = SystemZISD::VFCMPES;
2778 CCValid = SystemZ::CCMASK_VCMP;
2779 return true;
2780
2781 case Intrinsic::s390_vfchdbs:
2782 case Intrinsic::s390_vfchsbs:
2783 Opcode = SystemZISD::VFCMPHS;
2784 CCValid = SystemZ::CCMASK_VCMP;
2785 return true;
2786
2787 case Intrinsic::s390_vfchedbs:
2788 case Intrinsic::s390_vfchesbs:
2789 Opcode = SystemZISD::VFCMPHES;
2790 CCValid = SystemZ::CCMASK_VCMP;
2791 return true;
2792
2793 case Intrinsic::s390_vftcidb:
2794 case Intrinsic::s390_vftcisb:
2795 Opcode = SystemZISD::VFTCI;
2796 CCValid = SystemZ::CCMASK_VCMP;
2797 return true;
2798
2799 case Intrinsic::s390_tdc:
2800 Opcode = SystemZISD::TDC;
2801 CCValid = SystemZ::CCMASK_TDC;
2802 return true;
2803
2804 default:
2805 return false;
2806 }
2807}
2808
2809// Emit an intrinsic with chain and an explicit CC register result.
2811 unsigned Opcode) {
2812 // Copy all operands except the intrinsic ID.
2813 unsigned NumOps = Op.getNumOperands();
2815 Ops.reserve(NumOps - 1);
2816 Ops.push_back(Op.getOperand(0));
2817 for (unsigned I = 2; I < NumOps; ++I)
2818 Ops.push_back(Op.getOperand(I));
2819
2820 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2821 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2822 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2823 SDValue OldChain = SDValue(Op.getNode(), 1);
2824 SDValue NewChain = SDValue(Intr.getNode(), 1);
2825 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2826 return Intr.getNode();
2827}
2828
2829// Emit an intrinsic with an explicit CC register result.
2831 unsigned Opcode) {
2832 // Copy all operands except the intrinsic ID.
2833 SDLoc DL(Op);
2834 unsigned NumOps = Op.getNumOperands();
2836 Ops.reserve(NumOps - 1);
2837 for (unsigned I = 1; I < NumOps; ++I) {
2838 SDValue CurrOper = Op.getOperand(I);
2839 if (CurrOper.getValueType() == MVT::f16) {
2840 assert((Op.getConstantOperandVal(0) == Intrinsic::s390_tdc && I == 1) &&
2841 "Unhandled intrinsic with f16 operand.");
2842 CurrOper = DAG.getFPExtendOrRound(CurrOper, DL, MVT::f32);
2843 }
2844 Ops.push_back(CurrOper);
2845 }
2846
2847 SDValue Intr = DAG.getNode(Opcode, DL, Op->getVTList(), Ops);
2848 return Intr.getNode();
2849}
2850
2851// CC is a comparison that will be implemented using an integer or
2852// floating-point comparison. Return the condition code mask for
2853// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2854// unsigned comparisons and clear for signed ones. In the floating-point
2855// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2857#define CONV(X) \
2858 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2859 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2860 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2861
2862 switch (CC) {
2863 default:
2864 llvm_unreachable("Invalid integer condition!");
2865
2866 CONV(EQ);
2867 CONV(NE);
2868 CONV(GT);
2869 CONV(GE);
2870 CONV(LT);
2871 CONV(LE);
2872
2873 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2875 }
2876#undef CONV
2877}
2878
2879// If C can be converted to a comparison against zero, adjust the operands
2880// as necessary.
2881static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2882 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2883 return;
2884
2885 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2886 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2887 return;
2888
2889 int64_t Value = ConstOp1->getSExtValue();
2890 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2891 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2892 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2893 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2894 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2895 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2896 }
2897}
2898
2899// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2900// adjust the operands as necessary.
2901static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2902 Comparison &C) {
2903 // For us to make any changes, it must a comparison between a single-use
2904 // load and a constant.
2905 if (!C.Op0.hasOneUse() ||
2906 C.Op0.getOpcode() != ISD::LOAD ||
2907 C.Op1.getOpcode() != ISD::Constant)
2908 return;
2909
2910 // We must have an 8- or 16-bit load.
2911 auto *Load = cast<LoadSDNode>(C.Op0);
2912 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2913 if ((NumBits != 8 && NumBits != 16) ||
2914 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2915 return;
2916
2917 // The load must be an extending one and the constant must be within the
2918 // range of the unextended value.
2919 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2920 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2921 return;
2922 uint64_t Value = ConstOp1->getZExtValue();
2923 uint64_t Mask = (1 << NumBits) - 1;
2924 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2925 // Make sure that ConstOp1 is in range of C.Op0.
2926 int64_t SignedValue = ConstOp1->getSExtValue();
2927 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2928 return;
2929 if (C.ICmpType != SystemZICMP::SignedOnly) {
2930 // Unsigned comparison between two sign-extended values is equivalent
2931 // to unsigned comparison between two zero-extended values.
2932 Value &= Mask;
2933 } else if (NumBits == 8) {
2934 // Try to treat the comparison as unsigned, so that we can use CLI.
2935 // Adjust CCMask and Value as necessary.
2936 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2937 // Test whether the high bit of the byte is set.
2938 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2939 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2940 // Test whether the high bit of the byte is clear.
2941 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2942 else
2943 // No instruction exists for this combination.
2944 return;
2945 C.ICmpType = SystemZICMP::UnsignedOnly;
2946 }
2947 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2948 if (Value > Mask)
2949 return;
2950 // If the constant is in range, we can use any comparison.
2951 C.ICmpType = SystemZICMP::Any;
2952 } else
2953 return;
2954
2955 // Make sure that the first operand is an i32 of the right extension type.
2956 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2959 if (C.Op0.getValueType() != MVT::i32 ||
2960 Load->getExtensionType() != ExtType) {
2961 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2962 Load->getBasePtr(), Load->getPointerInfo(),
2963 Load->getMemoryVT(), Load->getAlign(),
2964 Load->getMemOperand()->getFlags());
2965 // Update the chain uses.
2966 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2967 }
2968
2969 // Make sure that the second operand is an i32 with the right value.
2970 if (C.Op1.getValueType() != MVT::i32 ||
2971 Value != ConstOp1->getZExtValue())
2972 C.Op1 = DAG.getConstant((uint32_t)Value, DL, MVT::i32);
2973}
2974
2975// Return true if Op is either an unextended load, or a load suitable
2976// for integer register-memory comparisons of type ICmpType.
2977static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2978 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2979 if (Load) {
2980 // There are no instructions to compare a register with a memory byte.
2981 if (Load->getMemoryVT() == MVT::i8)
2982 return false;
2983 // Otherwise decide on extension type.
2984 switch (Load->getExtensionType()) {
2985 case ISD::NON_EXTLOAD:
2986 return true;
2987 case ISD::SEXTLOAD:
2988 return ICmpType != SystemZICMP::UnsignedOnly;
2989 case ISD::ZEXTLOAD:
2990 return ICmpType != SystemZICMP::SignedOnly;
2991 default:
2992 break;
2993 }
2994 }
2995 return false;
2996}
2997
2998// Return true if it is better to swap the operands of C.
2999static bool shouldSwapCmpOperands(const Comparison &C) {
3000 // Leave i128 and f128 comparisons alone, since they have no memory forms.
3001 if (C.Op0.getValueType() == MVT::i128)
3002 return false;
3003 if (C.Op0.getValueType() == MVT::f128)
3004 return false;
3005
3006 // Always keep a floating-point constant second, since comparisons with
3007 // zero can use LOAD TEST and comparisons with other constants make a
3008 // natural memory operand.
3009 if (isa<ConstantFPSDNode>(C.Op1))
3010 return false;
3011
3012 // Never swap comparisons with zero since there are many ways to optimize
3013 // those later.
3014 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
3015 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
3016 return false;
3017
3018 // Also keep natural memory operands second if the loaded value is
3019 // only used here. Several comparisons have memory forms.
3020 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
3021 return false;
3022
3023 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
3024 // In that case we generally prefer the memory to be second.
3025 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
3026 // The only exceptions are when the second operand is a constant and
3027 // we can use things like CHHSI.
3028 if (!ConstOp1)
3029 return true;
3030 // The unsigned memory-immediate instructions can handle 16-bit
3031 // unsigned integers.
3032 if (C.ICmpType != SystemZICMP::SignedOnly &&
3033 isUInt<16>(ConstOp1->getZExtValue()))
3034 return false;
3035 // The signed memory-immediate instructions can handle 16-bit
3036 // signed integers.
3037 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
3038 isInt<16>(ConstOp1->getSExtValue()))
3039 return false;
3040 return true;
3041 }
3042
3043 // Try to promote the use of CGFR and CLGFR.
3044 unsigned Opcode0 = C.Op0.getOpcode();
3045 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
3046 return true;
3047 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
3048 return true;
3049 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
3050 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
3051 C.Op0.getConstantOperandVal(1) == 0xffffffff)
3052 return true;
3053
3054 return false;
3055}
3056
3057// Check whether C tests for equality between X and Y and whether X - Y
3058// or Y - X is also computed. In that case it's better to compare the
3059// result of the subtraction against zero.
3061 Comparison &C) {
3062 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3063 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3064 for (SDNode *N : C.Op0->users()) {
3065 if (N->getOpcode() == ISD::SUB &&
3066 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
3067 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
3068 // Disable the nsw and nuw flags: the backend needs to handle
3069 // overflow as well during comparison elimination.
3070 N->dropFlags(SDNodeFlags::NoWrap);
3071 C.Op0 = SDValue(N, 0);
3072 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
3073 return;
3074 }
3075 }
3076 }
3077}
3078
3079// Check whether C compares a floating-point value with zero and if that
3080// floating-point value is also negated. In this case we can use the
3081// negation to set CC, so avoiding separate LOAD AND TEST and
3082// LOAD (NEGATIVE/COMPLEMENT) instructions.
3083static void adjustForFNeg(Comparison &C) {
3084 // This optimization is invalid for strict comparisons, since FNEG
3085 // does not raise any exceptions.
3086 if (C.Chain)
3087 return;
3088 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
3089 if (C1 && C1->isZero()) {
3090 for (SDNode *N : C.Op0->users()) {
3091 if (N->getOpcode() == ISD::FNEG) {
3092 C.Op0 = SDValue(N, 0);
3093 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3094 return;
3095 }
3096 }
3097 }
3098}
3099
3100// Check whether C compares (shl X, 32) with 0 and whether X is
3101// also sign-extended. In that case it is better to test the result
3102// of the sign extension using LTGFR.
3103//
3104// This case is important because InstCombine transforms a comparison
3105// with (sext (trunc X)) into a comparison with (shl X, 32).
3106static void adjustForLTGFR(Comparison &C) {
3107 // Check for a comparison between (shl X, 32) and 0.
3108 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
3109 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
3110 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3111 if (C1 && C1->getZExtValue() == 32) {
3112 SDValue ShlOp0 = C.Op0.getOperand(0);
3113 // See whether X has any SIGN_EXTEND_INREG uses.
3114 for (SDNode *N : ShlOp0->users()) {
3115 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
3116 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
3117 C.Op0 = SDValue(N, 0);
3118 return;
3119 }
3120 }
3121 }
3122 }
3123}
3124
3125// If C compares the truncation of an extending load, try to compare
3126// the untruncated value instead. This exposes more opportunities to
3127// reuse CC.
3128static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
3129 Comparison &C) {
3130 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
3131 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
3132 C.Op1.getOpcode() == ISD::Constant &&
3133 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3134 C.Op1->getAsZExtVal() == 0) {
3135 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
3136 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
3137 C.Op0.getValueSizeInBits().getFixedValue()) {
3138 unsigned Type = L->getExtensionType();
3139 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
3140 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
3141 C.Op0 = C.Op0.getOperand(0);
3142 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
3143 }
3144 }
3145 }
3146}
3147
3148// Return true if shift operation N has an in-range constant shift value.
3149// Store it in ShiftVal if so.
3150static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
3151 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
3152 if (!Shift)
3153 return false;
3154
3155 uint64_t Amount = Shift->getZExtValue();
3156 if (Amount >= N.getValueSizeInBits())
3157 return false;
3158
3159 ShiftVal = Amount;
3160 return true;
3161}
3162
3163// Check whether an AND with Mask is suitable for a TEST UNDER MASK
3164// instruction and whether the CC value is descriptive enough to handle
3165// a comparison of type Opcode between the AND result and CmpVal.
3166// CCMask says which comparison result is being tested and BitSize is
3167// the number of bits in the operands. If TEST UNDER MASK can be used,
3168// return the corresponding CC mask, otherwise return 0.
3169static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
3170 uint64_t Mask, uint64_t CmpVal,
3171 unsigned ICmpType) {
3172 assert(Mask != 0 && "ANDs with zero should have been removed by now");
3173
3174 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
3175 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
3176 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
3177 return 0;
3178
3179 // Work out the masks for the lowest and highest bits.
3181 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
3182
3183 // Signed ordered comparisons are effectively unsigned if the sign
3184 // bit is dropped.
3185 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
3186
3187 // Check for equality comparisons with 0, or the equivalent.
3188 if (CmpVal == 0) {
3189 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3191 if (CCMask == SystemZ::CCMASK_CMP_NE)
3193 }
3194 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
3195 if (CCMask == SystemZ::CCMASK_CMP_LT)
3197 if (CCMask == SystemZ::CCMASK_CMP_GE)
3199 }
3200 if (EffectivelyUnsigned && CmpVal < Low) {
3201 if (CCMask == SystemZ::CCMASK_CMP_LE)
3203 if (CCMask == SystemZ::CCMASK_CMP_GT)
3205 }
3206
3207 // Check for equality comparisons with the mask, or the equivalent.
3208 if (CmpVal == Mask) {
3209 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3211 if (CCMask == SystemZ::CCMASK_CMP_NE)
3213 }
3214 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
3215 if (CCMask == SystemZ::CCMASK_CMP_GT)
3217 if (CCMask == SystemZ::CCMASK_CMP_LE)
3219 }
3220 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
3221 if (CCMask == SystemZ::CCMASK_CMP_GE)
3223 if (CCMask == SystemZ::CCMASK_CMP_LT)
3225 }
3226
3227 // Check for ordered comparisons with the top bit.
3228 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
3229 if (CCMask == SystemZ::CCMASK_CMP_LE)
3231 if (CCMask == SystemZ::CCMASK_CMP_GT)
3233 }
3234 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
3235 if (CCMask == SystemZ::CCMASK_CMP_LT)
3237 if (CCMask == SystemZ::CCMASK_CMP_GE)
3239 }
3240
3241 // If there are just two bits, we can do equality checks for Low and High
3242 // as well.
3243 if (Mask == Low + High) {
3244 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
3246 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
3248 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
3250 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
3252 }
3253
3254 // Looks like we've exhausted our options.
3255 return 0;
3256}
3257
3258// See whether C can be implemented as a TEST UNDER MASK instruction.
3259// Update the arguments with the TM version if so.
3261 Comparison &C) {
3262 // Use VECTOR TEST UNDER MASK for i128 operations.
3263 if (C.Op0.getValueType() == MVT::i128) {
3264 // We can use VTM for EQ/NE comparisons of x & y against 0.
3265 if (C.Op0.getOpcode() == ISD::AND &&
3266 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3267 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
3268 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
3269 if (Mask && Mask->getAPIntValue() == 0) {
3270 C.Opcode = SystemZISD::VTM;
3271 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
3272 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
3273 C.CCValid = SystemZ::CCMASK_VCMP;
3274 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3275 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3276 else
3277 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3278 }
3279 }
3280 return;
3281 }
3282
3283 // Check that we have a comparison with a constant.
3284 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
3285 if (!ConstOp1)
3286 return;
3287 uint64_t CmpVal = ConstOp1->getZExtValue();
3288
3289 // Check whether the nonconstant input is an AND with a constant mask.
3290 Comparison NewC(C);
3291 uint64_t MaskVal;
3292 ConstantSDNode *Mask = nullptr;
3293 if (C.Op0.getOpcode() == ISD::AND) {
3294 NewC.Op0 = C.Op0.getOperand(0);
3295 NewC.Op1 = C.Op0.getOperand(1);
3296 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
3297 if (!Mask)
3298 return;
3299 MaskVal = Mask->getZExtValue();
3300 } else {
3301 // There is no instruction to compare with a 64-bit immediate
3302 // so use TMHH instead if possible. We need an unsigned ordered
3303 // comparison with an i64 immediate.
3304 if (NewC.Op0.getValueType() != MVT::i64 ||
3305 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
3306 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
3307 NewC.ICmpType == SystemZICMP::SignedOnly)
3308 return;
3309 // Convert LE and GT comparisons into LT and GE.
3310 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
3311 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
3312 if (CmpVal == uint64_t(-1))
3313 return;
3314 CmpVal += 1;
3315 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
3316 }
3317 // If the low N bits of Op1 are zero than the low N bits of Op0 can
3318 // be masked off without changing the result.
3319 MaskVal = -(CmpVal & -CmpVal);
3320 NewC.ICmpType = SystemZICMP::UnsignedOnly;
3321 }
3322 if (!MaskVal)
3323 return;
3324
3325 // Check whether the combination of mask, comparison value and comparison
3326 // type are suitable.
3327 unsigned BitSize = NewC.Op0.getValueSizeInBits();
3328 unsigned NewCCMask, ShiftVal;
3329 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3330 NewC.Op0.getOpcode() == ISD::SHL &&
3331 isSimpleShift(NewC.Op0, ShiftVal) &&
3332 (MaskVal >> ShiftVal != 0) &&
3333 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
3334 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3335 MaskVal >> ShiftVal,
3336 CmpVal >> ShiftVal,
3337 SystemZICMP::Any))) {
3338 NewC.Op0 = NewC.Op0.getOperand(0);
3339 MaskVal >>= ShiftVal;
3340 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3341 NewC.Op0.getOpcode() == ISD::SRL &&
3342 isSimpleShift(NewC.Op0, ShiftVal) &&
3343 (MaskVal << ShiftVal != 0) &&
3344 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
3345 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3346 MaskVal << ShiftVal,
3347 CmpVal << ShiftVal,
3349 NewC.Op0 = NewC.Op0.getOperand(0);
3350 MaskVal <<= ShiftVal;
3351 } else {
3352 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
3353 NewC.ICmpType);
3354 if (!NewCCMask)
3355 return;
3356 }
3357
3358 // Go ahead and make the change.
3359 C.Opcode = SystemZISD::TM;
3360 C.Op0 = NewC.Op0;
3361 if (Mask && Mask->getZExtValue() == MaskVal)
3362 C.Op1 = SDValue(Mask, 0);
3363 else
3364 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
3365 C.CCValid = SystemZ::CCMASK_TM;
3366 C.CCMask = NewCCMask;
3367}
3368
3369// Implement i128 comparison in vector registers.
3370static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
3371 Comparison &C) {
3372 if (C.Opcode != SystemZISD::ICMP)
3373 return;
3374 if (C.Op0.getValueType() != MVT::i128)
3375 return;
3376
3377 // Recognize vector comparison reductions.
3378 if ((C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3379 C.CCMask == SystemZ::CCMASK_CMP_NE) &&
3380 (isNullConstant(C.Op1) || isAllOnesConstant(C.Op1))) {
3381 bool CmpEq = C.CCMask == SystemZ::CCMASK_CMP_EQ;
3382 bool CmpNull = isNullConstant(C.Op1);
3383 SDValue Src = peekThroughBitcasts(C.Op0);
3384 if (Src.hasOneUse() && isBitwiseNot(Src)) {
3385 Src = Src.getOperand(0);
3386 CmpNull = !CmpNull;
3387 }
3388 unsigned Opcode = 0;
3389 if (Src.hasOneUse()) {
3390 switch (Src.getOpcode()) {
3391 case SystemZISD::VICMPE: Opcode = SystemZISD::VICMPES; break;
3392 case SystemZISD::VICMPH: Opcode = SystemZISD::VICMPHS; break;
3393 case SystemZISD::VICMPHL: Opcode = SystemZISD::VICMPHLS; break;
3394 case SystemZISD::VFCMPE: Opcode = SystemZISD::VFCMPES; break;
3395 case SystemZISD::VFCMPH: Opcode = SystemZISD::VFCMPHS; break;
3396 case SystemZISD::VFCMPHE: Opcode = SystemZISD::VFCMPHES; break;
3397 default: break;
3398 }
3399 }
3400 if (Opcode) {
3401 C.Opcode = Opcode;
3402 C.Op0 = Src->getOperand(0);
3403 C.Op1 = Src->getOperand(1);
3404 C.CCValid = SystemZ::CCMASK_VCMP;
3406 if (!CmpEq)
3407 C.CCMask ^= C.CCValid;
3408 return;
3409 }
3410 }
3411
3412 // Everything below here is not useful if we have native i128 compares.
3413 if (DAG.getSubtarget<SystemZSubtarget>().hasVectorEnhancements3())
3414 return;
3415
3416 // (In-)Equality comparisons can be implemented via VCEQGS.
3417 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3418 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3419 C.Opcode = SystemZISD::VICMPES;
3420 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
3421 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
3422 C.CCValid = SystemZ::CCMASK_VCMP;
3423 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3424 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3425 else
3426 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3427 return;
3428 }
3429
3430 // Normalize other comparisons to GT.
3431 bool Swap = false, Invert = false;
3432 switch (C.CCMask) {
3433 case SystemZ::CCMASK_CMP_GT: break;
3434 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
3435 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
3436 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
3437 default: llvm_unreachable("Invalid integer condition!");
3438 }
3439 if (Swap)
3440 std::swap(C.Op0, C.Op1);
3441
3442 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3443 C.Opcode = SystemZISD::UCMP128HI;
3444 else
3445 C.Opcode = SystemZISD::SCMP128HI;
3446 C.CCValid = SystemZ::CCMASK_ANY;
3447 C.CCMask = SystemZ::CCMASK_1;
3448
3449 if (Invert)
3450 C.CCMask ^= C.CCValid;
3451}
3452
3453// See whether the comparison argument contains a redundant AND
3454// and remove it if so. This sometimes happens due to the generic
3455// BRCOND expansion.
3457 Comparison &C) {
3458 if (C.Op0.getOpcode() != ISD::AND)
3459 return;
3460 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3461 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3462 return;
3463 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3464 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3465 return;
3466
3467 C.Op0 = C.Op0.getOperand(0);
3468}
3469
3470// Return a Comparison that tests the condition-code result of intrinsic
3471// node Call against constant integer CC using comparison code Cond.
3472// Opcode is the opcode of the SystemZISD operation for the intrinsic
3473// and CCValid is the set of possible condition-code results.
3474static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3475 SDValue Call, unsigned CCValid, uint64_t CC,
3477 Comparison C(Call, SDValue(), SDValue());
3478 C.Opcode = Opcode;
3479 C.CCValid = CCValid;
3480 if (Cond == ISD::SETEQ)
3481 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3482 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3483 else if (Cond == ISD::SETNE)
3484 // ...and the inverse of that.
3485 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3486 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3487 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3488 // always true for CC>3.
3489 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3490 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3491 // ...and the inverse of that.
3492 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3493 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3494 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3495 // always true for CC>3.
3496 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3497 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3498 // ...and the inverse of that.
3499 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3500 else
3501 llvm_unreachable("Unexpected integer comparison type");
3502 C.CCMask &= CCValid;
3503 return C;
3504}
3505
3506// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3507static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3508 ISD::CondCode Cond, const SDLoc &DL,
3509 SDValue Chain = SDValue(),
3510 bool IsSignaling = false) {
3511 if (CmpOp1.getOpcode() == ISD::Constant) {
3512 assert(!Chain);
3513 unsigned Opcode, CCValid;
3514 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3515 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3516 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3517 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3518 CmpOp1->getAsZExtVal(), Cond);
3519 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3520 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3521 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3522 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3523 CmpOp1->getAsZExtVal(), Cond);
3524 }
3525 Comparison C(CmpOp0, CmpOp1, Chain);
3526 C.CCMask = CCMaskForCondCode(Cond);
3527 if (C.Op0.getValueType().isFloatingPoint()) {
3528 C.CCValid = SystemZ::CCMASK_FCMP;
3529 if (!C.Chain)
3530 C.Opcode = SystemZISD::FCMP;
3531 else if (!IsSignaling)
3532 C.Opcode = SystemZISD::STRICT_FCMP;
3533 else
3534 C.Opcode = SystemZISD::STRICT_FCMPS;
3536 } else {
3537 assert(!C.Chain);
3538 C.CCValid = SystemZ::CCMASK_ICMP;
3539 C.Opcode = SystemZISD::ICMP;
3540 // Choose the type of comparison. Equality and inequality tests can
3541 // use either signed or unsigned comparisons. The choice also doesn't
3542 // matter if both sign bits are known to be clear. In those cases we
3543 // want to give the main isel code the freedom to choose whichever
3544 // form fits best.
3545 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3546 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3547 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3548 C.ICmpType = SystemZICMP::Any;
3549 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3550 C.ICmpType = SystemZICMP::UnsignedOnly;
3551 else
3552 C.ICmpType = SystemZICMP::SignedOnly;
3553 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3554 adjustForRedundantAnd(DAG, DL, C);
3555 adjustZeroCmp(DAG, DL, C);
3556 adjustSubwordCmp(DAG, DL, C);
3557 adjustForSubtraction(DAG, DL, C);
3559 adjustICmpTruncate(DAG, DL, C);
3560 }
3561
3562 if (shouldSwapCmpOperands(C)) {
3563 std::swap(C.Op0, C.Op1);
3564 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3565 }
3566
3568 adjustICmp128(DAG, DL, C);
3569 return C;
3570}
3571
3572// Emit the comparison instruction described by C.
3573static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3574 if (!C.Op1.getNode()) {
3575 SDNode *Node;
3576 switch (C.Op0.getOpcode()) {
3578 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3579 return SDValue(Node, 0);
3581 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3582 return SDValue(Node, Node->getNumValues() - 1);
3583 default:
3584 llvm_unreachable("Invalid comparison operands");
3585 }
3586 }
3587 if (C.Opcode == SystemZISD::ICMP)
3588 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3589 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3590 if (C.Opcode == SystemZISD::TM) {
3591 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3593 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3594 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3595 }
3596 if (C.Opcode == SystemZISD::VICMPES ||
3597 C.Opcode == SystemZISD::VICMPHS ||
3598 C.Opcode == SystemZISD::VICMPHLS ||
3599 C.Opcode == SystemZISD::VFCMPES ||
3600 C.Opcode == SystemZISD::VFCMPHS ||
3601 C.Opcode == SystemZISD::VFCMPHES) {
3602 EVT IntVT = C.Op0.getValueType().changeVectorElementTypeToInteger();
3603 SDVTList VTs = DAG.getVTList(IntVT, MVT::i32);
3604 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3605 return SDValue(Val.getNode(), 1);
3606 }
3607 if (C.Chain) {
3608 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3609 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3610 }
3611 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3612}
3613
3614// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3615// 64 bits. Extend is the extension type to use. Store the high part
3616// in Hi and the low part in Lo.
3617static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3618 SDValue Op0, SDValue Op1, SDValue &Hi,
3619 SDValue &Lo) {
3620 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3621 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3622 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3623 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3624 DAG.getConstant(32, DL, MVT::i64));
3625 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3626 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3627}
3628
3629// Lower a binary operation that produces two VT results, one in each
3630// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3631// and Opcode performs the GR128 operation. Store the even register result
3632// in Even and the odd register result in Odd.
3633static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3634 unsigned Opcode, SDValue Op0, SDValue Op1,
3635 SDValue &Even, SDValue &Odd) {
3636 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3637 bool Is32Bit = is32Bit(VT);
3638 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3639 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3640}
3641
3642// Return an i32 value that is 1 if the CC value produced by CCReg is
3643// in the mask CCMask and 0 otherwise. CC is known to have a value
3644// in CCValid, so other values can be ignored.
3645static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3646 unsigned CCValid, unsigned CCMask) {
3647 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3648 DAG.getConstant(0, DL, MVT::i32),
3649 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3650 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3651 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3652}
3653
3654// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3655// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3656// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3657// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3658// floating-point comparisons.
3661 switch (CC) {
3662 case ISD::SETOEQ:
3663 case ISD::SETEQ:
3664 switch (Mode) {
3665 case CmpMode::Int: return SystemZISD::VICMPE;
3666 case CmpMode::FP: return SystemZISD::VFCMPE;
3667 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3668 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3669 }
3670 llvm_unreachable("Bad mode");
3671
3672 case ISD::SETOGE:
3673 case ISD::SETGE:
3674 switch (Mode) {
3675 case CmpMode::Int: return 0;
3676 case CmpMode::FP: return SystemZISD::VFCMPHE;
3677 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3678 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3679 }
3680 llvm_unreachable("Bad mode");
3681
3682 case ISD::SETOGT:
3683 case ISD::SETGT:
3684 switch (Mode) {
3685 case CmpMode::Int: return SystemZISD::VICMPH;
3686 case CmpMode::FP: return SystemZISD::VFCMPH;
3687 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3688 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3689 }
3690 llvm_unreachable("Bad mode");
3691
3692 case ISD::SETUGT:
3693 switch (Mode) {
3694 case CmpMode::Int: return SystemZISD::VICMPHL;
3695 case CmpMode::FP: return 0;
3696 case CmpMode::StrictFP: return 0;
3697 case CmpMode::SignalingFP: return 0;
3698 }
3699 llvm_unreachable("Bad mode");
3700
3701 default:
3702 return 0;
3703 }
3704}
3705
3706// Return the SystemZISD vector comparison operation for CC or its inverse,
3707// or 0 if neither can be done directly. Indicate in Invert whether the
3708// result is for the inverse of CC. Mode is as above.
3710 bool &Invert) {
3711 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3712 Invert = false;
3713 return Opcode;
3714 }
3715
3716 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3717 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3718 Invert = true;
3719 return Opcode;
3720 }
3721
3722 return 0;
3723}
3724
3725// Return a v2f64 that contains the extended form of elements Start and Start+1
3726// of v4f32 value Op. If Chain is nonnull, return the strict form.
3727static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3728 SDValue Op, SDValue Chain) {
3729 int Mask[] = { Start, -1, Start + 1, -1 };
3730 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3731 if (Chain) {
3732 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3733 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3734 }
3735 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3736}
3737
3738// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3739// producing a result of type VT. If Chain is nonnull, return the strict form.
3740SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3741 const SDLoc &DL, EVT VT,
3742 SDValue CmpOp0,
3743 SDValue CmpOp1,
3744 SDValue Chain) const {
3745 // There is no hardware support for v4f32 (unless we have the vector
3746 // enhancements facility 1), so extend the vector into two v2f64s
3747 // and compare those.
3748 if (CmpOp0.getValueType() == MVT::v4f32 &&
3749 !Subtarget.hasVectorEnhancements1()) {
3750 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3751 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3752 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3753 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3754 if (Chain) {
3755 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3756 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3757 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3758 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3759 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3760 H1.getValue(1), L1.getValue(1),
3761 HRes.getValue(1), LRes.getValue(1) };
3762 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3763 SDValue Ops[2] = { Res, NewChain };
3764 return DAG.getMergeValues(Ops, DL);
3765 }
3766 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3767 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3768 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3769 }
3770 if (Chain) {
3771 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3772 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3773 }
3774 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3775}
3776
3777// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3778// an integer mask of type VT. If Chain is nonnull, we have a strict
3779// floating-point comparison. If in addition IsSignaling is true, we have
3780// a strict signaling floating-point comparison.
3781SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3782 const SDLoc &DL, EVT VT,
3783 ISD::CondCode CC,
3784 SDValue CmpOp0,
3785 SDValue CmpOp1,
3786 SDValue Chain,
3787 bool IsSignaling) const {
3788 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3789 assert (!Chain || IsFP);
3790 assert (!IsSignaling || Chain);
3791 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3792 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3793 bool Invert = false;
3794 SDValue Cmp;
3795 switch (CC) {
3796 // Handle tests for order using (or (ogt y x) (oge x y)).
3797 case ISD::SETUO:
3798 Invert = true;
3799 [[fallthrough]];
3800 case ISD::SETO: {
3801 assert(IsFP && "Unexpected integer comparison");
3802 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3803 DL, VT, CmpOp1, CmpOp0, Chain);
3804 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3805 DL, VT, CmpOp0, CmpOp1, Chain);
3806 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3807 if (Chain)
3808 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3809 LT.getValue(1), GE.getValue(1));
3810 break;
3811 }
3812
3813 // Handle <> tests using (or (ogt y x) (ogt x y)).
3814 case ISD::SETUEQ:
3815 Invert = true;
3816 [[fallthrough]];
3817 case ISD::SETONE: {
3818 assert(IsFP && "Unexpected integer comparison");
3819 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3820 DL, VT, CmpOp1, CmpOp0, Chain);
3821 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3822 DL, VT, CmpOp0, CmpOp1, Chain);
3823 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3824 if (Chain)
3825 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3826 LT.getValue(1), GT.getValue(1));
3827 break;
3828 }
3829
3830 // Otherwise a single comparison is enough. It doesn't really
3831 // matter whether we try the inversion or the swap first, since
3832 // there are no cases where both work.
3833 default:
3834 // Optimize sign-bit comparisons to signed compares.
3835 if (Mode == CmpMode::Int && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
3837 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3838 APInt Mask;
3839 if (CmpOp0.getOpcode() == ISD::AND
3840 && ISD::isConstantSplatVector(CmpOp0.getOperand(1).getNode(), Mask)
3841 && Mask == APInt::getSignMask(EltSize)) {
3842 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
3843 CmpOp0 = CmpOp0.getOperand(0);
3844 }
3845 }
3846 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3847 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3848 else {
3850 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3851 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3852 else
3853 llvm_unreachable("Unhandled comparison");
3854 }
3855 if (Chain)
3856 Chain = Cmp.getValue(1);
3857 break;
3858 }
3859 if (Invert) {
3860 SDValue Mask =
3861 DAG.getSplatBuildVector(VT, DL, DAG.getAllOnesConstant(DL, MVT::i64));
3862 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3863 }
3864 if (Chain && Chain.getNode() != Cmp.getNode()) {
3865 SDValue Ops[2] = { Cmp, Chain };
3866 Cmp = DAG.getMergeValues(Ops, DL);
3867 }
3868 return Cmp;
3869}
3870
3871SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3872 SelectionDAG &DAG) const {
3873 SDValue CmpOp0 = Op.getOperand(0);
3874 SDValue CmpOp1 = Op.getOperand(1);
3875 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3876 SDLoc DL(Op);
3877 EVT VT = Op.getValueType();
3878 if (VT.isVector())
3879 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3880
3881 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3882 SDValue CCReg = emitCmp(DAG, DL, C);
3883 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3884}
3885
3886SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3887 SelectionDAG &DAG,
3888 bool IsSignaling) const {
3889 SDValue Chain = Op.getOperand(0);
3890 SDValue CmpOp0 = Op.getOperand(1);
3891 SDValue CmpOp1 = Op.getOperand(2);
3892 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3893 SDLoc DL(Op);
3894 EVT VT = Op.getNode()->getValueType(0);
3895 if (VT.isVector()) {
3896 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3897 Chain, IsSignaling);
3898 return Res.getValue(Op.getResNo());
3899 }
3900
3901 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3902 SDValue CCReg = emitCmp(DAG, DL, C);
3903 CCReg->setFlags(Op->getFlags());
3904 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3905 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3906 return DAG.getMergeValues(Ops, DL);
3907}
3908
3909SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3910 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3911 SDValue CmpOp0 = Op.getOperand(2);
3912 SDValue CmpOp1 = Op.getOperand(3);
3913 SDValue Dest = Op.getOperand(4);
3914 SDLoc DL(Op);
3915
3916 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3917 SDValue CCReg = emitCmp(DAG, DL, C);
3918 return DAG.getNode(
3919 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3920 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3921 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3922}
3923
3924// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3925// allowing Pos and Neg to be wider than CmpOp.
3926static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3927 return (Neg.getOpcode() == ISD::SUB &&
3928 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3929 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
3930 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3931 Pos.getOperand(0) == CmpOp)));
3932}
3933
3934// Return the absolute or negative absolute of Op; IsNegative decides which.
3936 bool IsNegative) {
3937 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3938 if (IsNegative)
3939 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3940 DAG.getConstant(0, DL, Op.getValueType()), Op);
3941 return Op;
3942}
3943
3945 Comparison C, SDValue TrueOp, SDValue FalseOp) {
3946 EVT VT = MVT::i128;
3947 unsigned Op;
3948
3949 if (C.CCMask == SystemZ::CCMASK_CMP_NE ||
3950 C.CCMask == SystemZ::CCMASK_CMP_GE ||
3951 C.CCMask == SystemZ::CCMASK_CMP_LE) {
3952 std::swap(TrueOp, FalseOp);
3953 C.CCMask ^= C.CCValid;
3954 }
3955 if (C.CCMask == SystemZ::CCMASK_CMP_LT) {
3956 std::swap(C.Op0, C.Op1);
3957 C.CCMask = SystemZ::CCMASK_CMP_GT;
3958 }
3959 switch (C.CCMask) {
3961 Op = SystemZISD::VICMPE;
3962 break;
3964 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3965 Op = SystemZISD::VICMPHL;
3966 else
3967 Op = SystemZISD::VICMPH;
3968 break;
3969 default:
3970 llvm_unreachable("Unhandled comparison");
3971 break;
3972 }
3973
3974 SDValue Mask = DAG.getNode(Op, DL, VT, C.Op0, C.Op1);
3975 TrueOp = DAG.getNode(ISD::AND, DL, VT, TrueOp, Mask);
3976 FalseOp = DAG.getNode(ISD::AND, DL, VT, FalseOp, DAG.getNOT(DL, Mask, VT));
3977 return DAG.getNode(ISD::OR, DL, VT, TrueOp, FalseOp);
3978}
3979
3980SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3981 SelectionDAG &DAG) const {
3982 SDValue CmpOp0 = Op.getOperand(0);
3983 SDValue CmpOp1 = Op.getOperand(1);
3984 SDValue TrueOp = Op.getOperand(2);
3985 SDValue FalseOp = Op.getOperand(3);
3986 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3987 SDLoc DL(Op);
3988
3989 // SELECT_CC involving f16 will not have the cmp-ops promoted by the
3990 // legalizer, as it will be handled according to the type of the resulting
3991 // value. Extend them here if needed.
3992 if (CmpOp0.getSimpleValueType() == MVT::f16) {
3993 CmpOp0 = DAG.getFPExtendOrRound(CmpOp0, SDLoc(CmpOp0), MVT::f32);
3994 CmpOp1 = DAG.getFPExtendOrRound(CmpOp1, SDLoc(CmpOp1), MVT::f32);
3995 }
3996
3997 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3998
3999 // Check for absolute and negative-absolute selections, including those
4000 // where the comparison value is sign-extended (for LPGFR and LNGFR).
4001 // This check supplements the one in DAGCombiner.
4002 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
4003 C.CCMask != SystemZ::CCMASK_CMP_NE &&
4004 C.Op1.getOpcode() == ISD::Constant &&
4005 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
4006 C.Op1->getAsZExtVal() == 0) {
4007 if (isAbsolute(C.Op0, TrueOp, FalseOp))
4008 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
4009 if (isAbsolute(C.Op0, FalseOp, TrueOp))
4010 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
4011 }
4012
4013 if (Subtarget.hasVectorEnhancements3() &&
4014 C.Opcode == SystemZISD::ICMP &&
4015 C.Op0.getValueType() == MVT::i128 &&
4016 TrueOp.getValueType() == MVT::i128) {
4017 return getI128Select(DAG, DL, C, TrueOp, FalseOp);
4018 }
4019
4020 SDValue CCReg = emitCmp(DAG, DL, C);
4021 SDValue Ops[] = {TrueOp, FalseOp,
4022 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
4023 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
4024
4025 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
4026}
4027
4028SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
4029 SelectionDAG &DAG) const {
4030 SDLoc DL(Node);
4031 const GlobalValue *GV = Node->getGlobal();
4032 int64_t Offset = Node->getOffset();
4033 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4035
4037 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
4038 if (isInt<32>(Offset)) {
4039 // Assign anchors at 1<<12 byte boundaries.
4040 uint64_t Anchor = Offset & ~uint64_t(0xfff);
4041 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
4042 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4043
4044 // The offset can be folded into the address if it is aligned to a
4045 // halfword.
4046 Offset -= Anchor;
4047 if (Offset != 0 && (Offset & 1) == 0) {
4048 SDValue Full =
4049 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
4050 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
4051 Offset = 0;
4052 }
4053 } else {
4054 // Conservatively load a constant offset greater than 32 bits into a
4055 // register below.
4056 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
4057 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4058 }
4059 } else if (Subtarget.isTargetELF()) {
4060 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
4061 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4062 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4064 } else if (Subtarget.isTargetzOS()) {
4065 Result = getADAEntry(DAG, GV, DL, PtrVT);
4066 } else
4067 llvm_unreachable("Unexpected Subtarget");
4068
4069 // If there was a non-zero offset that we didn't fold, create an explicit
4070 // addition for it.
4071 if (Offset != 0)
4072 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4073 DAG.getSignedConstant(Offset, DL, PtrVT));
4074
4075 return Result;
4076}
4077
4078SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
4079 SelectionDAG &DAG,
4080 unsigned Opcode,
4081 SDValue GOTOffset) const {
4082 SDLoc DL(Node);
4083 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4084 SDValue Chain = DAG.getEntryNode();
4085 SDValue Glue;
4086
4089 report_fatal_error("In GHC calling convention TLS is not supported");
4090
4091 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
4092 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
4093 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
4094 Glue = Chain.getValue(1);
4095 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
4096 Glue = Chain.getValue(1);
4097
4098 // The first call operand is the chain and the second is the TLS symbol.
4100 Ops.push_back(Chain);
4101 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
4102 Node->getValueType(0),
4103 0, 0));
4104
4105 // Add argument registers to the end of the list so that they are
4106 // known live into the call.
4107 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
4108 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
4109
4110 // Add a register mask operand representing the call-preserved registers.
4111 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4112 const uint32_t *Mask =
4113 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
4114 assert(Mask && "Missing call preserved mask for calling convention");
4115 Ops.push_back(DAG.getRegisterMask(Mask));
4116
4117 // Glue the call to the argument copies.
4118 Ops.push_back(Glue);
4119
4120 // Emit the call.
4121 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4122 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
4123 Glue = Chain.getValue(1);
4124
4125 // Copy the return value from %r2.
4126 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
4127}
4128
4129SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
4130 SelectionDAG &DAG) const {
4131 SDValue Chain = DAG.getEntryNode();
4132 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4133
4134 // The high part of the thread pointer is in access register 0.
4135 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
4136 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
4137
4138 // The low part of the thread pointer is in access register 1.
4139 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
4140 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
4141
4142 // Merge them into a single 64-bit address.
4143 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
4144 DAG.getConstant(32, DL, PtrVT));
4145 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
4146}
4147
4148SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
4149 SelectionDAG &DAG) const {
4150 if (DAG.getTarget().useEmulatedTLS())
4151 return LowerToTLSEmulatedModel(Node, DAG);
4152 SDLoc DL(Node);
4153 const GlobalValue *GV = Node->getGlobal();
4154 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4155 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
4156
4159 report_fatal_error("In GHC calling convention TLS is not supported");
4160
4161 SDValue TP = lowerThreadPointer(DL, DAG);
4162
4163 // Get the offset of GA from the thread pointer, based on the TLS model.
4165 switch (model) {
4167 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
4168 SystemZConstantPoolValue *CPV =
4170
4171 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4172 Offset = DAG.getLoad(
4173 PtrVT, DL, DAG.getEntryNode(), Offset,
4175
4176 // Call __tls_get_offset to retrieve the offset.
4177 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
4178 break;
4179 }
4180
4182 // Load the GOT offset of the module ID.
4183 SystemZConstantPoolValue *CPV =
4185
4186 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4187 Offset = DAG.getLoad(
4188 PtrVT, DL, DAG.getEntryNode(), Offset,
4190
4191 // Call __tls_get_offset to retrieve the module base offset.
4192 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
4193
4194 // Note: The SystemZLDCleanupPass will remove redundant computations
4195 // of the module base offset. Count total number of local-dynamic
4196 // accesses to trigger execution of that pass.
4197 SystemZMachineFunctionInfo* MFI =
4198 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
4200
4201 // Add the per-symbol offset.
4203
4204 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4205 DTPOffset = DAG.getLoad(
4206 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
4208
4209 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
4210 break;
4211 }
4212
4213 case TLSModel::InitialExec: {
4214 // Load the offset from the GOT.
4215 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
4217 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
4218 Offset =
4219 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
4221 break;
4222 }
4223
4224 case TLSModel::LocalExec: {
4225 // Force the offset into the constant pool and load it from there.
4226 SystemZConstantPoolValue *CPV =
4228
4229 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4230 Offset = DAG.getLoad(
4231 PtrVT, DL, DAG.getEntryNode(), Offset,
4233 break;
4234 }
4235 }
4236
4237 // Add the base and offset together.
4238 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
4239}
4240
4241SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
4242 SelectionDAG &DAG) const {
4243 SDLoc DL(Node);
4244 const BlockAddress *BA = Node->getBlockAddress();
4245 int64_t Offset = Node->getOffset();
4246 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4247
4248 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
4249 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4250 return Result;
4251}
4252
4253SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
4254 SelectionDAG &DAG) const {
4255 SDLoc DL(JT);
4256 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4257 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
4258
4259 // Use LARL to load the address of the table.
4260 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4261}
4262
4263SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
4264 SelectionDAG &DAG) const {
4265 SDLoc DL(CP);
4266 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4267
4270 Result =
4271 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
4272 else
4273 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
4274 CP->getOffset());
4275
4276 // Use LARL to load the address of the constant pool entry.
4277 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4278}
4279
4280SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
4281 SelectionDAG &DAG) const {
4282 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4283 MachineFunction &MF = DAG.getMachineFunction();
4284 MachineFrameInfo &MFI = MF.getFrameInfo();
4285 MFI.setFrameAddressIsTaken(true);
4286
4287 SDLoc DL(Op);
4288 unsigned Depth = Op.getConstantOperandVal(0);
4289 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4290
4291 // By definition, the frame address is the address of the back chain. (In
4292 // the case of packed stack without backchain, return the address where the
4293 // backchain would have been stored. This will either be an unused space or
4294 // contain a saved register).
4295 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
4296 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
4297
4298 if (Depth > 0) {
4299 // FIXME The frontend should detect this case.
4300 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4301 report_fatal_error("Unsupported stack frame traversal count");
4302
4303 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
4304 while (Depth--) {
4305 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
4306 MachinePointerInfo());
4307 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
4308 }
4309 }
4310
4311 return BackChain;
4312}
4313
4314SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
4315 SelectionDAG &DAG) const {
4316 MachineFunction &MF = DAG.getMachineFunction();
4317 MachineFrameInfo &MFI = MF.getFrameInfo();
4318 MFI.setReturnAddressIsTaken(true);
4319
4320 SDLoc DL(Op);
4321 unsigned Depth = Op.getConstantOperandVal(0);
4322 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4323
4324 if (Depth > 0) {
4325 // FIXME The frontend should detect this case.
4326 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4327 report_fatal_error("Unsupported stack frame traversal count");
4328
4329 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
4330 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4331 int Offset = TFL->getReturnAddressOffset(MF);
4332 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
4333 DAG.getSignedConstant(Offset, DL, PtrVT));
4334 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
4335 MachinePointerInfo());
4336 }
4337
4338 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an
4339 // implicit live-in.
4340 SystemZCallingConventionRegisters *CCR = Subtarget.getSpecialRegisters();
4342 &SystemZ::GR64BitRegClass);
4343 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
4344}
4345
4346SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
4347 SelectionDAG &DAG) const {
4348 SDLoc DL(Op);
4349 SDValue In = Op.getOperand(0);
4350 EVT InVT = In.getValueType();
4351 EVT ResVT = Op.getValueType();
4352
4353 // Convert loads directly. This is normally done by DAGCombiner,
4354 // but we need this case for bitcasts that are created during lowering
4355 // and which are then lowered themselves.
4356 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
4357 if (ISD::isNormalLoad(LoadN)) {
4358 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
4359 LoadN->getBasePtr(), LoadN->getMemOperand());
4360 // Update the chain uses.
4361 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
4362 return NewLoad;
4363 }
4364
4365 if (InVT == MVT::i32 && ResVT == MVT::f32) {
4366 SDValue In64;
4367 if (Subtarget.hasHighWord()) {
4368 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
4369 MVT::i64);
4370 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4371 MVT::i64, SDValue(U64, 0), In);
4372 } else {
4373 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
4374 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
4375 DAG.getConstant(32, DL, MVT::i64));
4376 }
4377 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
4378 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
4379 DL, MVT::f32, Out64);
4380 }
4381 if (InVT == MVT::f32 && ResVT == MVT::i32) {
4382 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
4383 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4384 MVT::f64, SDValue(U64, 0), In);
4385 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
4386 if (Subtarget.hasHighWord())
4387 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
4388 MVT::i32, Out64);
4389 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
4390 DAG.getConstant(32, DL, MVT::i64));
4391 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
4392 }
4393 llvm_unreachable("Unexpected bitcast combination");
4394}
4395
4396SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
4397 SelectionDAG &DAG) const {
4398
4399 if (Subtarget.isTargetXPLINK64())
4400 return lowerVASTART_XPLINK(Op, DAG);
4401 else
4402 return lowerVASTART_ELF(Op, DAG);
4403}
4404
4405SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
4406 SelectionDAG &DAG) const {
4407 MachineFunction &MF = DAG.getMachineFunction();
4408 SystemZMachineFunctionInfo *FuncInfo =
4409 MF.getInfo<SystemZMachineFunctionInfo>();
4410
4411 SDLoc DL(Op);
4412
4413 // vastart just stores the address of the VarArgsFrameIndex slot into the
4414 // memory location argument.
4415 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4416 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4417 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4418 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4419 MachinePointerInfo(SV));
4420}
4421
4422SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
4423 SelectionDAG &DAG) const {
4424 MachineFunction &MF = DAG.getMachineFunction();
4425 SystemZMachineFunctionInfo *FuncInfo =
4426 MF.getInfo<SystemZMachineFunctionInfo>();
4427 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4428
4429 SDValue Chain = Op.getOperand(0);
4430 SDValue Addr = Op.getOperand(1);
4431 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4432 SDLoc DL(Op);
4433
4434 // The initial values of each field.
4435 const unsigned NumFields = 4;
4436 SDValue Fields[NumFields] = {
4437 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
4438 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
4439 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
4440 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
4441 };
4442
4443 // Store each field into its respective slot.
4444 SDValue MemOps[NumFields];
4445 unsigned Offset = 0;
4446 for (unsigned I = 0; I < NumFields; ++I) {
4447 SDValue FieldAddr = Addr;
4448 if (Offset != 0)
4449 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
4451 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
4452 MachinePointerInfo(SV, Offset));
4453 Offset += 8;
4454 }
4455 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4456}
4457
4458SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
4459 SelectionDAG &DAG) const {
4460 SDValue Chain = Op.getOperand(0);
4461 SDValue DstPtr = Op.getOperand(1);
4462 SDValue SrcPtr = Op.getOperand(2);
4463 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
4464 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4465 SDLoc DL(Op);
4466
4467 uint32_t Sz =
4468 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
4469 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
4470 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
4471 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV),
4472 MachinePointerInfo(SrcSV));
4473}
4474
4475SDValue
4476SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
4477 SelectionDAG &DAG) const {
4478 if (Subtarget.isTargetXPLINK64())
4479 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
4480 else
4481 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
4482}
4483
4484SDValue
4485SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
4486 SelectionDAG &DAG) const {
4487 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4488 MachineFunction &MF = DAG.getMachineFunction();
4489 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4490 SDValue Chain = Op.getOperand(0);
4491 SDValue Size = Op.getOperand(1);
4492 SDValue Align = Op.getOperand(2);
4493 SDLoc DL(Op);
4494
4495 // If user has set the no alignment function attribute, ignore
4496 // alloca alignments.
4497 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4498
4499 uint64_t StackAlign = TFI->getStackAlignment();
4500 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4501 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4502
4503 SDValue NeededSpace = Size;
4504
4505 // Add extra space for alignment if needed.
4506 EVT PtrVT = getPointerTy(MF.getDataLayout());
4507 if (ExtraAlignSpace)
4508 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
4509 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4510
4511 bool IsSigned = false;
4512 bool DoesNotReturn = false;
4513 bool IsReturnValueUsed = false;
4514 EVT VT = Op.getValueType();
4515 SDValue AllocaCall =
4516 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
4517 CallingConv::C, IsSigned, DL, DoesNotReturn,
4518 IsReturnValueUsed)
4519 .first;
4520
4521 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4522 // to end of call in order to ensure it isn't broken up from the call
4523 // sequence.
4524 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4525 Register SPReg = Regs.getStackPointerRegister();
4526 Chain = AllocaCall.getValue(1);
4527 SDValue Glue = AllocaCall.getValue(2);
4528 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4529 Chain = NewSPRegNode.getValue(1);
4530
4531 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4532 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4533 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4534
4535 // Dynamically realign if needed.
4536 if (ExtraAlignSpace) {
4537 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4538 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4539 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4540 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4541 }
4542
4543 SDValue Ops[2] = {Result, Chain};
4544 return DAG.getMergeValues(Ops, DL);
4545}
4546
4547SDValue
4548SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4549 SelectionDAG &DAG) const {
4550 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4551 MachineFunction &MF = DAG.getMachineFunction();
4552 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4553 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4554
4555 SDValue Chain = Op.getOperand(0);
4556 SDValue Size = Op.getOperand(1);
4557 SDValue Align = Op.getOperand(2);
4558 SDLoc DL(Op);
4559
4560 // If user has set the no alignment function attribute, ignore
4561 // alloca alignments.
4562 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4563
4564 uint64_t StackAlign = TFI->getStackAlignment();
4565 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4566 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4567
4569 SDValue NeededSpace = Size;
4570
4571 // Get a reference to the stack pointer.
4572 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4573
4574 // If we need a backchain, save it now.
4575 SDValue Backchain;
4576 if (StoreBackchain)
4577 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4578 MachinePointerInfo());
4579
4580 // Add extra space for alignment if needed.
4581 if (ExtraAlignSpace)
4582 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4583 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4584
4585 // Get the new stack pointer value.
4586 SDValue NewSP;
4587 if (hasInlineStackProbe(MF)) {
4588 NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
4589 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4590 Chain = NewSP.getValue(1);
4591 }
4592 else {
4593 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4594 // Copy the new stack pointer back.
4595 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4596 }
4597
4598 // The allocated data lives above the 160 bytes allocated for the standard
4599 // frame, plus any outgoing stack arguments. We don't know how much that
4600 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4601 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4602 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4603
4604 // Dynamically realign if needed.
4605 if (RequiredAlign > StackAlign) {
4606 Result =
4607 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4608 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4609 Result =
4610 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4611 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4612 }
4613
4614 if (StoreBackchain)
4615 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4616 MachinePointerInfo());
4617
4618 SDValue Ops[2] = { Result, Chain };
4619 return DAG.getMergeValues(Ops, DL);
4620}
4621
4622SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4623 SDValue Op, SelectionDAG &DAG) const {
4624 SDLoc DL(Op);
4625
4626 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4627}
4628
4629SDValue SystemZTargetLowering::lowerMULH(SDValue Op,
4630 SelectionDAG &DAG,
4631 unsigned Opcode) const {
4632 EVT VT = Op.getValueType();
4633 SDLoc DL(Op);
4634 SDValue Even, Odd;
4635
4636 // This custom expander is only used on z17 and later for 64-bit types.
4637 assert(!is32Bit(VT));
4638 assert(Subtarget.hasMiscellaneousExtensions2());
4639
4640 // SystemZISD::xMUL_LOHI returns the low result in the odd register and
4641 // the high result in the even register. Return the latter.
4642 lowerGR128Binary(DAG, DL, VT, Opcode,
4643 Op.getOperand(0), Op.getOperand(1), Even, Odd);
4644 return Even;
4645}
4646
4647SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4648 SelectionDAG &DAG) const {
4649 EVT VT = Op.getValueType();
4650 SDLoc DL(Op);
4651 SDValue Ops[2];
4652 if (is32Bit(VT))
4653 // Just do a normal 64-bit multiplication and extract the results.
4654 // We define this so that it can be used for constant division.
4655 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4656 Op.getOperand(1), Ops[1], Ops[0]);
4657 else if (Subtarget.hasMiscellaneousExtensions2())
4658 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4659 // the high result in the even register. ISD::SMUL_LOHI is defined to
4660 // return the low half first, so the results are in reverse order.
4661 lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI,
4662 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4663 else {
4664 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4665 //
4666 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4667 //
4668 // but using the fact that the upper halves are either all zeros
4669 // or all ones:
4670 //
4671 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4672 //
4673 // and grouping the right terms together since they are quicker than the
4674 // multiplication:
4675 //
4676 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4677 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4678 SDValue LL = Op.getOperand(0);
4679 SDValue RL = Op.getOperand(1);
4680 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4681 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4682 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4683 // the high result in the even register. ISD::SMUL_LOHI is defined to
4684 // return the low half first, so the results are in reverse order.
4685 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
4686 LL, RL, Ops[1], Ops[0]);
4687 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4688 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4689 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4690 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4691 }
4692 return DAG.getMergeValues(Ops, DL);
4693}
4694
4695SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4696 SelectionDAG &DAG) const {
4697 EVT VT = Op.getValueType();
4698 SDLoc DL(Op);
4699 SDValue Ops[2];
4700 if (is32Bit(VT))
4701 // Just do a normal 64-bit multiplication and extract the results.
4702 // We define this so that it can be used for constant division.
4703 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4704 Op.getOperand(1), Ops[1], Ops[0]);
4705 else
4706 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4707 // the high result in the even register. ISD::UMUL_LOHI is defined to
4708 // return the low half first, so the results are in reverse order.
4709 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
4710 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4711 return DAG.getMergeValues(Ops, DL);
4712}
4713
4714SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4715 SelectionDAG &DAG) const {
4716 SDValue Op0 = Op.getOperand(0);
4717 SDValue Op1 = Op.getOperand(1);
4718 EVT VT = Op.getValueType();
4719 SDLoc DL(Op);
4720
4721 // We use DSGF for 32-bit division. This means the first operand must
4722 // always be 64-bit, and the second operand should be 32-bit whenever
4723 // that is possible, to improve performance.
4724 if (is32Bit(VT))
4725 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4726 else if (DAG.ComputeNumSignBits(Op1) > 32)
4727 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4728
4729 // DSG(F) returns the remainder in the even register and the
4730 // quotient in the odd register.
4731 SDValue Ops[2];
4732 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4733 return DAG.getMergeValues(Ops, DL);
4734}
4735
4736SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4737 SelectionDAG &DAG) const {
4738 EVT VT = Op.getValueType();
4739 SDLoc DL(Op);
4740
4741 // DL(G) returns the remainder in the even register and the
4742 // quotient in the odd register.
4743 SDValue Ops[2];
4744 lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM,
4745 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4746 return DAG.getMergeValues(Ops, DL);
4747}
4748
4749SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4750 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4751
4752 // Get the known-zero masks for each operand.
4753 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4754 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4755 DAG.computeKnownBits(Ops[1])};
4756
4757 // See if the upper 32 bits of one operand and the lower 32 bits of the
4758 // other are known zero. They are the low and high operands respectively.
4759 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4760 Known[1].Zero.getZExtValue() };
4761 unsigned High, Low;
4762 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4763 High = 1, Low = 0;
4764 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4765 High = 0, Low = 1;
4766 else
4767 return Op;
4768
4769 SDValue LowOp = Ops[Low];
4770 SDValue HighOp = Ops[High];
4771
4772 // If the high part is a constant, we're better off using IILH.
4773 if (HighOp.getOpcode() == ISD::Constant)
4774 return Op;
4775
4776 // If the low part is a constant that is outside the range of LHI,
4777 // then we're better off using IILF.
4778 if (LowOp.getOpcode() == ISD::Constant) {
4779 int64_t Value = int32_t(LowOp->getAsZExtVal());
4780 if (!isInt<16>(Value))
4781 return Op;
4782 }
4783
4784 // Check whether the high part is an AND that doesn't change the
4785 // high 32 bits and just masks out low bits. We can skip it if so.
4786 if (HighOp.getOpcode() == ISD::AND &&
4787 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4788 SDValue HighOp0 = HighOp.getOperand(0);
4789 uint64_t Mask = HighOp.getConstantOperandVal(1);
4790 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4791 HighOp = HighOp0;
4792 }
4793
4794 // Take advantage of the fact that all GR32 operations only change the
4795 // low 32 bits by truncating Low to an i32 and inserting it directly
4796 // using a subreg. The interesting cases are those where the truncation
4797 // can be folded.
4798 SDLoc DL(Op);
4799 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4800 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4801 MVT::i64, HighOp, Low32);
4802}
4803
4804// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4805SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4806 SelectionDAG &DAG) const {
4807 SDNode *N = Op.getNode();
4808 SDValue LHS = N->getOperand(0);
4809 SDValue RHS = N->getOperand(1);
4810 SDLoc DL(N);
4811
4812 if (N->getValueType(0) == MVT::i128) {
4813 unsigned BaseOp = 0;
4814 unsigned FlagOp = 0;
4815 bool IsBorrow = false;
4816 switch (Op.getOpcode()) {
4817 default: llvm_unreachable("Unknown instruction!");
4818 case ISD::UADDO:
4819 BaseOp = ISD::ADD;
4820 FlagOp = SystemZISD::VACC;
4821 break;
4822 case ISD::USUBO:
4823 BaseOp = ISD::SUB;
4824 FlagOp = SystemZISD::VSCBI;
4825 IsBorrow = true;
4826 break;
4827 }
4828 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4829 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4830 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4831 DAG.getValueType(MVT::i1));
4832 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4833 if (IsBorrow)
4834 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4835 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4836 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4837 }
4838
4839 unsigned BaseOp = 0;
4840 unsigned CCValid = 0;
4841 unsigned CCMask = 0;
4842
4843 switch (Op.getOpcode()) {
4844 default: llvm_unreachable("Unknown instruction!");
4845 case ISD::SADDO:
4846 BaseOp = SystemZISD::SADDO;
4847 CCValid = SystemZ::CCMASK_ARITH;
4849 break;
4850 case ISD::SSUBO:
4851 BaseOp = SystemZISD::SSUBO;
4852 CCValid = SystemZ::CCMASK_ARITH;
4854 break;
4855 case ISD::UADDO:
4856 BaseOp = SystemZISD::UADDO;
4857 CCValid = SystemZ::CCMASK_LOGICAL;
4859 break;
4860 case ISD::USUBO:
4861 BaseOp = SystemZISD::USUBO;
4862 CCValid = SystemZ::CCMASK_LOGICAL;
4864 break;
4865 }
4866
4867 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4868 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4869
4870 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4871 if (N->getValueType(1) == MVT::i1)
4872 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4873
4874 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4875}
4876
4877static bool isAddCarryChain(SDValue Carry) {
4878 while (Carry.getOpcode() == ISD::UADDO_CARRY &&
4879 Carry->getValueType(0) != MVT::i128)
4880 Carry = Carry.getOperand(2);
4881 return Carry.getOpcode() == ISD::UADDO &&
4882 Carry->getValueType(0) != MVT::i128;
4883}
4884
4885static bool isSubBorrowChain(SDValue Carry) {
4886 while (Carry.getOpcode() == ISD::USUBO_CARRY &&
4887 Carry->getValueType(0) != MVT::i128)
4888 Carry = Carry.getOperand(2);
4889 return Carry.getOpcode() == ISD::USUBO &&
4890 Carry->getValueType(0) != MVT::i128;
4891}
4892
4893// Lower UADDO_CARRY/USUBO_CARRY nodes.
4894SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4895 SelectionDAG &DAG) const {
4896
4897 SDNode *N = Op.getNode();
4898 MVT VT = N->getSimpleValueType(0);
4899
4900 // Let legalize expand this if it isn't a legal type yet.
4901 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4902 return SDValue();
4903
4904 SDValue LHS = N->getOperand(0);
4905 SDValue RHS = N->getOperand(1);
4906 SDValue Carry = Op.getOperand(2);
4907 SDLoc DL(N);
4908
4909 if (VT == MVT::i128) {
4910 unsigned BaseOp = 0;
4911 unsigned FlagOp = 0;
4912 bool IsBorrow = false;
4913 switch (Op.getOpcode()) {
4914 default: llvm_unreachable("Unknown instruction!");
4915 case ISD::UADDO_CARRY:
4916 BaseOp = SystemZISD::VAC;
4917 FlagOp = SystemZISD::VACCC;
4918 break;
4919 case ISD::USUBO_CARRY:
4920 BaseOp = SystemZISD::VSBI;
4921 FlagOp = SystemZISD::VSBCBI;
4922 IsBorrow = true;
4923 break;
4924 }
4925 if (IsBorrow)
4926 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4927 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
4928 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
4929 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
4930 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
4931 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4932 DAG.getValueType(MVT::i1));
4933 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4934 if (IsBorrow)
4935 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4936 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4937 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4938 }
4939
4940 unsigned BaseOp = 0;
4941 unsigned CCValid = 0;
4942 unsigned CCMask = 0;
4943
4944 switch (Op.getOpcode()) {
4945 default: llvm_unreachable("Unknown instruction!");
4946 case ISD::UADDO_CARRY:
4947 if (!isAddCarryChain(Carry))
4948 return SDValue();
4949
4950 BaseOp = SystemZISD::ADDCARRY;
4951 CCValid = SystemZ::CCMASK_LOGICAL;
4953 break;
4954 case ISD::USUBO_CARRY:
4955 if (!isSubBorrowChain(Carry))
4956 return SDValue();
4957
4958 BaseOp = SystemZISD::SUBCARRY;
4959 CCValid = SystemZ::CCMASK_LOGICAL;
4961 break;
4962 }
4963
4964 // Set the condition code from the carry flag.
4965 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4966 DAG.getConstant(CCValid, DL, MVT::i32),
4967 DAG.getConstant(CCMask, DL, MVT::i32));
4968
4969 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4970 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4971
4972 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4973 if (N->getValueType(1) == MVT::i1)
4974 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4975
4976 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4977}
4978
4979SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4980 SelectionDAG &DAG) const {
4981 EVT VT = Op.getValueType();
4982 SDLoc DL(Op);
4983 Op = Op.getOperand(0);
4984
4985 if (VT.getScalarSizeInBits() == 128) {
4986 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
4987 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
4988 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
4989 DAG.getConstant(0, DL, MVT::i64));
4990 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4991 return Op;
4992 }
4993
4994 // Handle vector types via VPOPCT.
4995 if (VT.isVector()) {
4996 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4997 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4998 switch (VT.getScalarSizeInBits()) {
4999 case 8:
5000 break;
5001 case 16: {
5002 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
5003 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
5004 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
5005 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
5006 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
5007 break;
5008 }
5009 case 32: {
5010 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
5011 DAG.getConstant(0, DL, MVT::i32));
5012 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
5013 break;
5014 }
5015 case 64: {
5016 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
5017 DAG.getConstant(0, DL, MVT::i32));
5018 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
5019 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
5020 break;
5021 }
5022 default:
5023 llvm_unreachable("Unexpected type");
5024 }
5025 return Op;
5026 }
5027
5028 // Get the known-zero mask for the operand.
5029 KnownBits Known = DAG.computeKnownBits(Op);
5030 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
5031 if (NumSignificantBits == 0)
5032 return DAG.getConstant(0, DL, VT);
5033
5034 // Skip known-zero high parts of the operand.
5035 int64_t OrigBitSize = VT.getSizeInBits();
5036 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
5037 BitSize = std::min(BitSize, OrigBitSize);
5038
5039 // The POPCNT instruction counts the number of bits in each byte.
5040 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
5041 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
5042 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
5043
5044 // Add up per-byte counts in a binary tree. All bits of Op at
5045 // position larger than BitSize remain zero throughout.
5046 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
5047 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
5048 if (BitSize != OrigBitSize)
5049 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
5050 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
5051 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
5052 }
5053
5054 // Extract overall result from high byte.
5055 if (BitSize > 8)
5056 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
5057 DAG.getConstant(BitSize - 8, DL, VT));
5058
5059 return Op;
5060}
5061
5062SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
5063 SelectionDAG &DAG) const {
5064 SDLoc DL(Op);
5065 AtomicOrdering FenceOrdering =
5066 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5067 SyncScope::ID FenceSSID =
5068 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5069
5070 // The only fence that needs an instruction is a sequentially-consistent
5071 // cross-thread fence.
5072 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5073 FenceSSID == SyncScope::System) {
5074 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
5075 Op.getOperand(0)),
5076 0);
5077 }
5078
5079 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5080 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
5081}
5082
5083SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
5084 SelectionDAG &DAG) const {
5085 EVT RegVT = Op.getValueType();
5086 if (RegVT.getSizeInBits() == 128)
5087 return lowerATOMIC_LDST_I128(Op, DAG);
5088 return lowerLoadF16(Op, DAG);
5089}
5090
5091SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
5092 SelectionDAG &DAG) const {
5093 auto *Node = cast<AtomicSDNode>(Op.getNode());
5094 if (Node->getMemoryVT().getSizeInBits() == 128)
5095 return lowerATOMIC_LDST_I128(Op, DAG);
5096 return lowerStoreF16(Op, DAG);
5097}
5098
5099SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
5100 SelectionDAG &DAG) const {
5101 auto *Node = cast<AtomicSDNode>(Op.getNode());
5102 assert(
5103 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
5104 "Only custom lowering i128 or f128.");
5105 // Use same code to handle both legal and non-legal i128 types.
5107 LowerOperationWrapper(Node, Results, DAG);
5108 return DAG.getMergeValues(Results, SDLoc(Op));
5109}
5110
5111// Prepare for a Compare And Swap for a subword operation. This needs to be
5112// done in memory with 4 bytes at natural alignment.
5114 SDValue &AlignedAddr, SDValue &BitShift,
5115 SDValue &NegBitShift) {
5116 EVT PtrVT = Addr.getValueType();
5117 EVT WideVT = MVT::i32;
5118
5119 // Get the address of the containing word.
5120 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
5121 DAG.getSignedConstant(-4, DL, PtrVT));
5122
5123 // Get the number of bits that the word must be rotated left in order
5124 // to bring the field to the top bits of a GR32.
5125 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
5126 DAG.getConstant(3, DL, PtrVT));
5127 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
5128
5129 // Get the complementing shift amount, for rotating a field in the top
5130 // bits back to its proper position.
5131 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
5132 DAG.getConstant(0, DL, WideVT), BitShift);
5133
5134}
5135
5136// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
5137// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
5138SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
5139 SelectionDAG &DAG,
5140 unsigned Opcode) const {
5141 auto *Node = cast<AtomicSDNode>(Op.getNode());
5142
5143 // 32-bit operations need no special handling.
5144 EVT NarrowVT = Node->getMemoryVT();
5145 EVT WideVT = MVT::i32;
5146 if (NarrowVT == WideVT)
5147 return Op;
5148
5149 int64_t BitSize = NarrowVT.getSizeInBits();
5150 SDValue ChainIn = Node->getChain();
5151 SDValue Addr = Node->getBasePtr();
5152 SDValue Src2 = Node->getVal();
5153 MachineMemOperand *MMO = Node->getMemOperand();
5154 SDLoc DL(Node);
5155
5156 // Convert atomic subtracts of constants into additions.
5157 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
5158 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
5159 Opcode = SystemZISD::ATOMIC_LOADW_ADD;
5160 Src2 = DAG.getSignedConstant(-Const->getSExtValue(), DL,
5161 Src2.getValueType());
5162 }
5163
5164 SDValue AlignedAddr, BitShift, NegBitShift;
5165 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5166
5167 // Extend the source operand to 32 bits and prepare it for the inner loop.
5168 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
5169 // operations require the source to be shifted in advance. (This shift
5170 // can be folded if the source is constant.) For AND and NAND, the lower
5171 // bits must be set, while for other opcodes they should be left clear.
5172 if (Opcode != SystemZISD::ATOMIC_SWAPW)
5173 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
5174 DAG.getConstant(32 - BitSize, DL, WideVT));
5175 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
5176 Opcode == SystemZISD::ATOMIC_LOADW_NAND)
5177 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
5178 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
5179
5180 // Construct the ATOMIC_LOADW_* node.
5181 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
5182 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
5183 DAG.getConstant(BitSize, DL, WideVT) };
5184 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
5185 NarrowVT, MMO);
5186
5187 // Rotate the result of the final CS so that the field is in the lower
5188 // bits of a GR32, then truncate it.
5189 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
5190 DAG.getConstant(BitSize, DL, WideVT));
5191 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
5192
5193 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
5194 return DAG.getMergeValues(RetOps, DL);
5195}
5196
5197// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
5198// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
5199SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
5200 SelectionDAG &DAG) const {
5201 auto *Node = cast<AtomicSDNode>(Op.getNode());
5202 EVT MemVT = Node->getMemoryVT();
5203 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
5204 // A full-width operation: negate and use LAA(G).
5205 assert(Op.getValueType() == MemVT && "Mismatched VTs");
5206 assert(Subtarget.hasInterlockedAccess1() &&
5207 "Should have been expanded by AtomicExpand pass.");
5208 SDValue Src2 = Node->getVal();
5209 SDLoc DL(Src2);
5210 SDValue NegSrc2 =
5211 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
5212 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
5213 Node->getChain(), Node->getBasePtr(), NegSrc2,
5214 Node->getMemOperand());
5215 }
5216
5217 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
5218}
5219
5220// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
5221SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
5222 SelectionDAG &DAG) const {
5223 auto *Node = cast<AtomicSDNode>(Op.getNode());
5224 SDValue ChainIn = Node->getOperand(0);
5225 SDValue Addr = Node->getOperand(1);
5226 SDValue CmpVal = Node->getOperand(2);
5227 SDValue SwapVal = Node->getOperand(3);
5228 MachineMemOperand *MMO = Node->getMemOperand();
5229 SDLoc DL(Node);
5230
5231 if (Node->getMemoryVT() == MVT::i128) {
5232 // Use same code to handle both legal and non-legal i128 types.
5234 LowerOperationWrapper(Node, Results, DAG);
5235 return DAG.getMergeValues(Results, DL);
5236 }
5237
5238 // We have native support for 32-bit and 64-bit compare and swap, but we
5239 // still need to expand extracting the "success" result from the CC.
5240 EVT NarrowVT = Node->getMemoryVT();
5241 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
5242 if (NarrowVT == WideVT) {
5243 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5244 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
5245 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP,
5246 DL, Tys, Ops, NarrowVT, MMO);
5247 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5249
5250 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
5251 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5252 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5253 return SDValue();
5254 }
5255
5256 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
5257 // via a fullword ATOMIC_CMP_SWAPW operation.
5258 int64_t BitSize = NarrowVT.getSizeInBits();
5259
5260 SDValue AlignedAddr, BitShift, NegBitShift;
5261 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5262
5263 // Construct the ATOMIC_CMP_SWAPW node.
5264 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5265 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
5266 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
5267 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
5268 VTList, Ops, NarrowVT, MMO);
5269 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5271
5272 // emitAtomicCmpSwapW() will zero extend the result (original value).
5273 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
5274 DAG.getValueType(NarrowVT));
5275 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
5276 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5277 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5278 return SDValue();
5279}
5280
5282SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
5283 // Because of how we convert atomic_load and atomic_store to normal loads and
5284 // stores in the DAG, we need to ensure that the MMOs are marked volatile
5285 // since DAGCombine hasn't been updated to account for atomic, but non
5286 // volatile loads. (See D57601)
5287 if (auto *SI = dyn_cast<StoreInst>(&I))
5288 if (SI->isAtomic())
5290 if (auto *LI = dyn_cast<LoadInst>(&I))
5291 if (LI->isAtomic())
5293 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
5294 if (AI->isAtomic())
5296 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
5297 if (AI->isAtomic())
5300}
5301
5302SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
5303 SelectionDAG &DAG) const {
5304 MachineFunction &MF = DAG.getMachineFunction();
5305 auto *Regs = Subtarget.getSpecialRegisters();
5307 report_fatal_error("Variable-sized stack allocations are not supported "
5308 "in GHC calling convention");
5309 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
5310 Regs->getStackPointerRegister(), Op.getValueType());
5311}
5312
5313SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
5314 SelectionDAG &DAG) const {
5315 MachineFunction &MF = DAG.getMachineFunction();
5316 auto *Regs = Subtarget.getSpecialRegisters();
5317 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
5318
5320 report_fatal_error("Variable-sized stack allocations are not supported "
5321 "in GHC calling convention");
5322
5323 SDValue Chain = Op.getOperand(0);
5324 SDValue NewSP = Op.getOperand(1);
5325 SDValue Backchain;
5326 SDLoc DL(Op);
5327
5328 if (StoreBackchain) {
5329 SDValue OldSP = DAG.getCopyFromReg(
5330 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
5331 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
5332 MachinePointerInfo());
5333 }
5334
5335 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
5336
5337 if (StoreBackchain)
5338 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
5339 MachinePointerInfo());
5340
5341 return Chain;
5342}
5343
5344SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
5345 SelectionDAG &DAG) const {
5346 bool IsData = Op.getConstantOperandVal(4);
5347 if (!IsData)
5348 // Just preserve the chain.
5349 return Op.getOperand(0);
5350
5351 SDLoc DL(Op);
5352 bool IsWrite = Op.getConstantOperandVal(2);
5353 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
5354 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
5355 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
5356 Op.getOperand(1)};
5357 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
5358 Node->getVTList(), Ops,
5359 Node->getMemoryVT(), Node->getMemOperand());
5360}
5361
5362SDValue
5363SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
5364 SelectionDAG &DAG) const {
5365 unsigned Opcode, CCValid;
5366 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
5367 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
5368 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
5369 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
5370 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
5371 return SDValue();
5372 }
5373
5374 return SDValue();
5375}
5376
5377SDValue
5378SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
5379 SelectionDAG &DAG) const {
5380 unsigned Opcode, CCValid;
5381 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
5382 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
5383 if (Op->getNumValues() == 1)
5384 return getCCResult(DAG, SDValue(Node, 0));
5385 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
5386 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
5387 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
5388 }
5389
5390 unsigned Id = Op.getConstantOperandVal(0);
5391 switch (Id) {
5392 case Intrinsic::thread_pointer:
5393 return lowerThreadPointer(SDLoc(Op), DAG);
5394
5395 case Intrinsic::s390_vpdi:
5396 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
5397 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5398
5399 case Intrinsic::s390_vperm:
5400 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
5401 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5402
5403 case Intrinsic::s390_vuphb:
5404 case Intrinsic::s390_vuphh:
5405 case Intrinsic::s390_vuphf:
5406 case Intrinsic::s390_vuphg:
5407 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
5408 Op.getOperand(1));
5409
5410 case Intrinsic::s390_vuplhb:
5411 case Intrinsic::s390_vuplhh:
5412 case Intrinsic::s390_vuplhf:
5413 case Intrinsic::s390_vuplhg:
5414 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
5415 Op.getOperand(1));
5416
5417 case Intrinsic::s390_vuplb:
5418 case Intrinsic::s390_vuplhw:
5419 case Intrinsic::s390_vuplf:
5420 case Intrinsic::s390_vuplg:
5421 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
5422 Op.getOperand(1));
5423
5424 case Intrinsic::s390_vupllb:
5425 case Intrinsic::s390_vupllh:
5426 case Intrinsic::s390_vupllf:
5427 case Intrinsic::s390_vupllg:
5428 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
5429 Op.getOperand(1));
5430
5431 case Intrinsic::s390_vsumb:
5432 case Intrinsic::s390_vsumh:
5433 case Intrinsic::s390_vsumgh:
5434 case Intrinsic::s390_vsumgf:
5435 case Intrinsic::s390_vsumqf:
5436 case Intrinsic::s390_vsumqg:
5437 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
5438 Op.getOperand(1), Op.getOperand(2));
5439
5440 case Intrinsic::s390_vaq:
5441 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5442 Op.getOperand(1), Op.getOperand(2));
5443 case Intrinsic::s390_vaccb:
5444 case Intrinsic::s390_vacch:
5445 case Intrinsic::s390_vaccf:
5446 case Intrinsic::s390_vaccg:
5447 case Intrinsic::s390_vaccq:
5448 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
5449 Op.getOperand(1), Op.getOperand(2));
5450 case Intrinsic::s390_vacq:
5451 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
5452 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5453 case Intrinsic::s390_vacccq:
5454 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
5455 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5456
5457 case Intrinsic::s390_vsq:
5458 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
5459 Op.getOperand(1), Op.getOperand(2));
5460 case Intrinsic::s390_vscbib:
5461 case Intrinsic::s390_vscbih:
5462 case Intrinsic::s390_vscbif:
5463 case Intrinsic::s390_vscbig:
5464 case Intrinsic::s390_vscbiq:
5465 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
5466 Op.getOperand(1), Op.getOperand(2));
5467 case Intrinsic::s390_vsbiq:
5468 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
5469 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5470 case Intrinsic::s390_vsbcbiq:
5471 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
5472 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5473
5474 case Intrinsic::s390_vmhb:
5475 case Intrinsic::s390_vmhh:
5476 case Intrinsic::s390_vmhf:
5477 case Intrinsic::s390_vmhg:
5478 case Intrinsic::s390_vmhq:
5479 return DAG.getNode(ISD::MULHS, SDLoc(Op), Op.getValueType(),
5480 Op.getOperand(1), Op.getOperand(2));
5481 case Intrinsic::s390_vmlhb:
5482 case Intrinsic::s390_vmlhh:
5483 case Intrinsic::s390_vmlhf:
5484 case Intrinsic::s390_vmlhg:
5485 case Intrinsic::s390_vmlhq:
5486 return DAG.getNode(ISD::MULHU, SDLoc(Op), Op.getValueType(),
5487 Op.getOperand(1), Op.getOperand(2));
5488
5489 case Intrinsic::s390_vmahb:
5490 case Intrinsic::s390_vmahh:
5491 case Intrinsic::s390_vmahf:
5492 case Intrinsic::s390_vmahg:
5493 case Intrinsic::s390_vmahq:
5494 return DAG.getNode(SystemZISD::VMAH, SDLoc(Op), Op.getValueType(),
5495 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5496 case Intrinsic::s390_vmalhb:
5497 case Intrinsic::s390_vmalhh:
5498 case Intrinsic::s390_vmalhf:
5499 case Intrinsic::s390_vmalhg:
5500 case Intrinsic::s390_vmalhq:
5501 return DAG.getNode(SystemZISD::VMALH, SDLoc(Op), Op.getValueType(),
5502 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5503
5504 case Intrinsic::s390_vmeb:
5505 case Intrinsic::s390_vmeh:
5506 case Intrinsic::s390_vmef:
5507 case Intrinsic::s390_vmeg:
5508 return DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5509 Op.getOperand(1), Op.getOperand(2));
5510 case Intrinsic::s390_vmleb:
5511 case Intrinsic::s390_vmleh:
5512 case Intrinsic::s390_vmlef:
5513 case Intrinsic::s390_vmleg:
5514 return DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5515 Op.getOperand(1), Op.getOperand(2));
5516 case Intrinsic::s390_vmob:
5517 case Intrinsic::s390_vmoh:
5518 case Intrinsic::s390_vmof:
5519 case Intrinsic::s390_vmog:
5520 return DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5521 Op.getOperand(1), Op.getOperand(2));
5522 case Intrinsic::s390_vmlob:
5523 case Intrinsic::s390_vmloh:
5524 case Intrinsic::s390_vmlof:
5525 case Intrinsic::s390_vmlog:
5526 return DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5527 Op.getOperand(1), Op.getOperand(2));
5528
5529 case Intrinsic::s390_vmaeb:
5530 case Intrinsic::s390_vmaeh:
5531 case Intrinsic::s390_vmaef:
5532 case Intrinsic::s390_vmaeg:
5533 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5534 DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5535 Op.getOperand(1), Op.getOperand(2)),
5536 Op.getOperand(3));
5537 case Intrinsic::s390_vmaleb:
5538 case Intrinsic::s390_vmaleh:
5539 case Intrinsic::s390_vmalef:
5540 case Intrinsic::s390_vmaleg:
5541 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5542 DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5543 Op.getOperand(1), Op.getOperand(2)),
5544 Op.getOperand(3));
5545 case Intrinsic::s390_vmaob:
5546 case Intrinsic::s390_vmaoh:
5547 case Intrinsic::s390_vmaof:
5548 case Intrinsic::s390_vmaog:
5549 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5550 DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5551 Op.getOperand(1), Op.getOperand(2)),
5552 Op.getOperand(3));
5553 case Intrinsic::s390_vmalob:
5554 case Intrinsic::s390_vmaloh:
5555 case Intrinsic::s390_vmalof:
5556 case Intrinsic::s390_vmalog:
5557 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5558 DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5559 Op.getOperand(1), Op.getOperand(2)),
5560 Op.getOperand(3));
5561 }
5562
5563 return SDValue();
5564}
5565
5566namespace {
5567// Says that SystemZISD operation Opcode can be used to perform the equivalent
5568// of a VPERM with permute vector Bytes. If Opcode takes three operands,
5569// Operand is the constant third operand, otherwise it is the number of
5570// bytes in each element of the result.
5571struct Permute {
5572 unsigned Opcode;
5573 unsigned Operand;
5574 unsigned char Bytes[SystemZ::VectorBytes];
5575};
5576}
5577
5578static const Permute PermuteForms[] = {
5579 // VMRHG
5580 { SystemZISD::MERGE_HIGH, 8,
5581 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
5582 // VMRHF
5583 { SystemZISD::MERGE_HIGH, 4,
5584 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
5585 // VMRHH
5586 { SystemZISD::MERGE_HIGH, 2,
5587 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
5588 // VMRHB
5589 { SystemZISD::MERGE_HIGH, 1,
5590 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
5591 // VMRLG
5592 { SystemZISD::MERGE_LOW, 8,
5593 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
5594 // VMRLF
5595 { SystemZISD::MERGE_LOW, 4,
5596 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
5597 // VMRLH
5598 { SystemZISD::MERGE_LOW, 2,
5599 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
5600 // VMRLB
5601 { SystemZISD::MERGE_LOW, 1,
5602 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
5603 // VPKG
5604 { SystemZISD::PACK, 4,
5605 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
5606 // VPKF
5607 { SystemZISD::PACK, 2,
5608 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
5609 // VPKH
5610 { SystemZISD::PACK, 1,
5611 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
5612 // VPDI V1, V2, 4 (low half of V1, high half of V2)
5613 { SystemZISD::PERMUTE_DWORDS, 4,
5614 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
5615 // VPDI V1, V2, 1 (high half of V1, low half of V2)
5616 { SystemZISD::PERMUTE_DWORDS, 1,
5617 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
5618};
5619
5620// Called after matching a vector shuffle against a particular pattern.
5621// Both the original shuffle and the pattern have two vector operands.
5622// OpNos[0] is the operand of the original shuffle that should be used for
5623// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
5624// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
5625// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
5626// for operands 0 and 1 of the pattern.
5627static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
5628 if (OpNos[0] < 0) {
5629 if (OpNos[1] < 0)
5630 return false;
5631 OpNo0 = OpNo1 = OpNos[1];
5632 } else if (OpNos[1] < 0) {
5633 OpNo0 = OpNo1 = OpNos[0];
5634 } else {
5635 OpNo0 = OpNos[0];
5636 OpNo1 = OpNos[1];
5637 }
5638 return true;
5639}
5640
5641// Bytes is a VPERM-like permute vector, except that -1 is used for
5642// undefined bytes. Return true if the VPERM can be implemented using P.
5643// When returning true set OpNo0 to the VPERM operand that should be
5644// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5645//
5646// For example, if swapping the VPERM operands allows P to match, OpNo0
5647// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5648// operand, but rewriting it to use two duplicated operands allows it to
5649// match P, then OpNo0 and OpNo1 will be the same.
5650static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5651 unsigned &OpNo0, unsigned &OpNo1) {
5652 int OpNos[] = { -1, -1 };
5653 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5654 int Elt = Bytes[I];
5655 if (Elt >= 0) {
5656 // Make sure that the two permute vectors use the same suboperand
5657 // byte number. Only the operand numbers (the high bits) are
5658 // allowed to differ.
5659 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5660 return false;
5661 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5662 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5663 // Make sure that the operand mappings are consistent with previous
5664 // elements.
5665 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5666 return false;
5667 OpNos[ModelOpNo] = RealOpNo;
5668 }
5669 }
5670 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5671}
5672
5673// As above, but search for a matching permute.
5674static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5675 unsigned &OpNo0, unsigned &OpNo1) {
5676 for (auto &P : PermuteForms)
5677 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5678 return &P;
5679 return nullptr;
5680}
5681
5682// Bytes is a VPERM-like permute vector, except that -1 is used for
5683// undefined bytes. This permute is an operand of an outer permute.
5684// See whether redistributing the -1 bytes gives a shuffle that can be
5685// implemented using P. If so, set Transform to a VPERM-like permute vector
5686// that, when applied to the result of P, gives the original permute in Bytes.
5688 const Permute &P,
5689 SmallVectorImpl<int> &Transform) {
5690 unsigned To = 0;
5691 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5692 int Elt = Bytes[From];
5693 if (Elt < 0)
5694 // Byte number From of the result is undefined.
5695 Transform[From] = -1;
5696 else {
5697 while (P.Bytes[To] != Elt) {
5698 To += 1;
5699 if (To == SystemZ::VectorBytes)
5700 return false;
5701 }
5702 Transform[From] = To;
5703 }
5704 }
5705 return true;
5706}
5707
5708// As above, but search for a matching permute.
5709static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5710 SmallVectorImpl<int> &Transform) {
5711 for (auto &P : PermuteForms)
5712 if (matchDoublePermute(Bytes, P, Transform))
5713 return &P;
5714 return nullptr;
5715}
5716
5717// Convert the mask of the given shuffle op into a byte-level mask,
5718// as if it had type vNi8.
5719static bool getVPermMask(SDValue ShuffleOp,
5720 SmallVectorImpl<int> &Bytes) {
5721 EVT VT = ShuffleOp.getValueType();
5722 unsigned NumElements = VT.getVectorNumElements();
5723 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5724
5725 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5726 Bytes.resize(NumElements * BytesPerElement, -1);
5727 for (unsigned I = 0; I < NumElements; ++I) {
5728 int Index = VSN->getMaskElt(I);
5729 if (Index >= 0)
5730 for (unsigned J = 0; J < BytesPerElement; ++J)
5731 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5732 }
5733 return true;
5734 }
5735 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5736 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5737 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5738 Bytes.resize(NumElements * BytesPerElement, -1);
5739 for (unsigned I = 0; I < NumElements; ++I)
5740 for (unsigned J = 0; J < BytesPerElement; ++J)
5741 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5742 return true;
5743 }
5744 return false;
5745}
5746
5747// Bytes is a VPERM-like permute vector, except that -1 is used for
5748// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5749// the result come from a contiguous sequence of bytes from one input.
5750// Set Base to the selector for the first byte if so.
5751static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5752 unsigned BytesPerElement, int &Base) {
5753 Base = -1;
5754 for (unsigned I = 0; I < BytesPerElement; ++I) {
5755 if (Bytes[Start + I] >= 0) {
5756 unsigned Elem = Bytes[Start + I];
5757 if (Base < 0) {
5758 Base = Elem - I;
5759 // Make sure the bytes would come from one input operand.
5760 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5761 return false;
5762 } else if (unsigned(Base) != Elem - I)
5763 return false;
5764 }
5765 }
5766 return true;
5767}
5768
5769// Bytes is a VPERM-like permute vector, except that -1 is used for
5770// undefined bytes. Return true if it can be performed using VSLDB.
5771// When returning true, set StartIndex to the shift amount and OpNo0
5772// and OpNo1 to the VPERM operands that should be used as the first
5773// and second shift operand respectively.
5775 unsigned &StartIndex, unsigned &OpNo0,
5776 unsigned &OpNo1) {
5777 int OpNos[] = { -1, -1 };
5778 int Shift = -1;
5779 for (unsigned I = 0; I < 16; ++I) {
5780 int Index = Bytes[I];
5781 if (Index >= 0) {
5782 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5783 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5784 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5785 if (Shift < 0)
5786 Shift = ExpectedShift;
5787 else if (Shift != ExpectedShift)
5788 return false;
5789 // Make sure that the operand mappings are consistent with previous
5790 // elements.
5791 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5792 return false;
5793 OpNos[ModelOpNo] = RealOpNo;
5794 }
5795 }
5796 StartIndex = Shift;
5797 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5798}
5799
5800// Create a node that performs P on operands Op0 and Op1, casting the
5801// operands to the appropriate type. The type of the result is determined by P.
5803 const Permute &P, SDValue Op0, SDValue Op1) {
5804 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5805 // elements of a PACK are twice as wide as the outputs.
5806 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5807 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5808 P.Operand);
5809 // Cast both operands to the appropriate type.
5810 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5811 SystemZ::VectorBytes / InBytes);
5812 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5813 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5814 SDValue Op;
5815 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5816 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5817 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5818 } else if (P.Opcode == SystemZISD::PACK) {
5819 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5820 SystemZ::VectorBytes / P.Operand);
5821 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5822 } else {
5823 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5824 }
5825 return Op;
5826}
5827
5828static bool isZeroVector(SDValue N) {
5829 if (N->getOpcode() == ISD::BITCAST)
5830 N = N->getOperand(0);
5831 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5832 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5833 return Op->getZExtValue() == 0;
5834 return ISD::isBuildVectorAllZeros(N.getNode());
5835}
5836
5837// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5838static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5839 for (unsigned I = 0; I < Num ; I++)
5840 if (isZeroVector(Ops[I]))
5841 return I;
5842 return UINT32_MAX;
5843}
5844
5845// Bytes is a VPERM-like permute vector, except that -1 is used for
5846// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5847// VSLDB or VPERM.
5849 SDValue *Ops,
5850 const SmallVectorImpl<int> &Bytes) {
5851 for (unsigned I = 0; I < 2; ++I)
5852 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5853
5854 // First see whether VSLDB can be used.
5855 unsigned StartIndex, OpNo0, OpNo1;
5856 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5857 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5858 Ops[OpNo1],
5859 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5860
5861 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5862 // eliminate a zero vector by reusing any zero index in the permute vector.
5863 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5864 if (ZeroVecIdx != UINT32_MAX) {
5865 bool MaskFirst = true;
5866 int ZeroIdx = -1;
5867 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5868 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5869 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5870 if (OpNo == ZeroVecIdx && I == 0) {
5871 // If the first byte is zero, use mask as first operand.
5872 ZeroIdx = 0;
5873 break;
5874 }
5875 if (OpNo != ZeroVecIdx && Byte == 0) {
5876 // If mask contains a zero, use it by placing that vector first.
5877 ZeroIdx = I + SystemZ::VectorBytes;
5878 MaskFirst = false;
5879 break;
5880 }
5881 }
5882 if (ZeroIdx != -1) {
5883 SDValue IndexNodes[SystemZ::VectorBytes];
5884 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5885 if (Bytes[I] >= 0) {
5886 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5887 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5888 if (OpNo == ZeroVecIdx)
5889 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5890 else {
5891 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5892 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5893 }
5894 } else
5895 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5896 }
5897 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5898 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5899 if (MaskFirst)
5900 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5901 Mask);
5902 else
5903 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5904 Mask);
5905 }
5906 }
5907
5908 SDValue IndexNodes[SystemZ::VectorBytes];
5909 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5910 if (Bytes[I] >= 0)
5911 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5912 else
5913 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5914 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5915 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5916 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5917}
5918
5919namespace {
5920// Describes a general N-operand vector shuffle.
5921struct GeneralShuffle {
5922 GeneralShuffle(EVT vt)
5923 : VT(vt), UnpackFromEltSize(UINT_MAX), UnpackLow(false) {}
5924 void addUndef();
5925 bool add(SDValue, unsigned);
5926 SDValue getNode(SelectionDAG &, const SDLoc &);
5927 void tryPrepareForUnpack();
5928 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5929 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5930
5931 // The operands of the shuffle.
5933
5934 // Index I is -1 if byte I of the result is undefined. Otherwise the
5935 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5936 // Bytes[I] / SystemZ::VectorBytes.
5938
5939 // The type of the shuffle result.
5940 EVT VT;
5941
5942 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5943 unsigned UnpackFromEltSize;
5944 // True if the final unpack uses the low half.
5945 bool UnpackLow;
5946};
5947} // namespace
5948
5949// Add an extra undefined element to the shuffle.
5950void GeneralShuffle::addUndef() {
5951 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5952 for (unsigned I = 0; I < BytesPerElement; ++I)
5953 Bytes.push_back(-1);
5954}
5955
5956// Add an extra element to the shuffle, taking it from element Elem of Op.
5957// A null Op indicates a vector input whose value will be calculated later;
5958// there is at most one such input per shuffle and it always has the same
5959// type as the result. Aborts and returns false if the source vector elements
5960// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
5961// LLVM they become implicitly extended, but this is rare and not optimized.
5962bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
5963 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5964
5965 // The source vector can have wider elements than the result,
5966 // either through an explicit TRUNCATE or because of type legalization.
5967 // We want the least significant part.
5968 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
5969 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
5970
5971 // Return false if the source elements are smaller than their destination
5972 // elements.
5973 if (FromBytesPerElement < BytesPerElement)
5974 return false;
5975
5976 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
5977 (FromBytesPerElement - BytesPerElement));
5978
5979 // Look through things like shuffles and bitcasts.
5980 while (Op.getNode()) {
5981 if (Op.getOpcode() == ISD::BITCAST)
5982 Op = Op.getOperand(0);
5983 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
5984 // See whether the bytes we need come from a contiguous part of one
5985 // operand.
5987 if (!getVPermMask(Op, OpBytes))
5988 break;
5989 int NewByte;
5990 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
5991 break;
5992 if (NewByte < 0) {
5993 addUndef();
5994 return true;
5995 }
5996 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
5997 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
5998 } else if (Op.isUndef()) {
5999 addUndef();
6000 return true;
6001 } else
6002 break;
6003 }
6004
6005 // Make sure that the source of the extraction is in Ops.
6006 unsigned OpNo = 0;
6007 for (; OpNo < Ops.size(); ++OpNo)
6008 if (Ops[OpNo] == Op)
6009 break;
6010 if (OpNo == Ops.size())
6011 Ops.push_back(Op);
6012
6013 // Add the element to Bytes.
6014 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
6015 for (unsigned I = 0; I < BytesPerElement; ++I)
6016 Bytes.push_back(Base + I);
6017
6018 return true;
6019}
6020
6021// Return SDNodes for the completed shuffle.
6022SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
6023 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
6024
6025 if (Ops.size() == 0)
6026 return DAG.getUNDEF(VT);
6027
6028 // Use a single unpack if possible as the last operation.
6029 tryPrepareForUnpack();
6030
6031 // Make sure that there are at least two shuffle operands.
6032 if (Ops.size() == 1)
6033 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
6034
6035 // Create a tree of shuffles, deferring root node until after the loop.
6036 // Try to redistribute the undefined elements of non-root nodes so that
6037 // the non-root shuffles match something like a pack or merge, then adjust
6038 // the parent node's permute vector to compensate for the new order.
6039 // Among other things, this copes with vectors like <2 x i16> that were
6040 // padded with undefined elements during type legalization.
6041 //
6042 // In the best case this redistribution will lead to the whole tree
6043 // using packs and merges. It should rarely be a loss in other cases.
6044 unsigned Stride = 1;
6045 for (; Stride * 2 < Ops.size(); Stride *= 2) {
6046 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
6047 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
6048
6049 // Create a mask for just these two operands.
6051 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6052 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
6053 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
6054 if (OpNo == I)
6055 NewBytes[J] = Byte;
6056 else if (OpNo == I + Stride)
6057 NewBytes[J] = SystemZ::VectorBytes + Byte;
6058 else
6059 NewBytes[J] = -1;
6060 }
6061 // See if it would be better to reorganize NewMask to avoid using VPERM.
6063 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
6064 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
6065 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
6066 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6067 if (NewBytes[J] >= 0) {
6068 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
6069 "Invalid double permute");
6070 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
6071 } else
6072 assert(NewBytesMap[J] < 0 && "Invalid double permute");
6073 }
6074 } else {
6075 // Just use NewBytes on the operands.
6076 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
6077 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
6078 if (NewBytes[J] >= 0)
6079 Bytes[J] = I * SystemZ::VectorBytes + J;
6080 }
6081 }
6082 }
6083
6084 // Now we just have 2 inputs. Put the second operand in Ops[1].
6085 if (Stride > 1) {
6086 Ops[1] = Ops[Stride];
6087 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6088 if (Bytes[I] >= int(SystemZ::VectorBytes))
6089 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
6090 }
6091
6092 // Look for an instruction that can do the permute without resorting
6093 // to VPERM.
6094 unsigned OpNo0, OpNo1;
6095 SDValue Op;
6096 if (unpackWasPrepared() && Ops[1].isUndef())
6097 Op = Ops[0];
6098 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
6099 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
6100 else
6101 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
6102
6103 Op = insertUnpackIfPrepared(DAG, DL, Op);
6104
6105 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6106}
6107
6108#ifndef NDEBUG
6109static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
6110 dbgs() << Msg.c_str() << " { ";
6111 for (unsigned I = 0; I < Bytes.size(); I++)
6112 dbgs() << Bytes[I] << " ";
6113 dbgs() << "}\n";
6114}
6115#endif
6116
6117// If the Bytes vector matches an unpack operation, prepare to do the unpack
6118// after all else by removing the zero vector and the effect of the unpack on
6119// Bytes.
6120void GeneralShuffle::tryPrepareForUnpack() {
6121 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
6122 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
6123 return;
6124
6125 // Only do this if removing the zero vector reduces the depth, otherwise
6126 // the critical path will increase with the final unpack.
6127 if (Ops.size() > 2 &&
6128 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
6129 return;
6130
6131 // Find an unpack that would allow removing the zero vector from Ops.
6132 UnpackFromEltSize = 1;
6133 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
6134 bool MatchUnpack = true;
6136 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
6137 unsigned ToEltSize = UnpackFromEltSize * 2;
6138 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
6139 if (!IsZextByte)
6140 SrcBytes.push_back(Bytes[Elt]);
6141 if (Bytes[Elt] != -1) {
6142 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
6143 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
6144 MatchUnpack = false;
6145 break;
6146 }
6147 }
6148 }
6149 if (MatchUnpack) {
6150 if (Ops.size() == 2) {
6151 // Don't use unpack if a single source operand needs rearrangement.
6152 bool CanUseUnpackLow = true, CanUseUnpackHigh = true;
6153 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) {
6154 if (SrcBytes[i] == -1)
6155 continue;
6156 if (SrcBytes[i] % 16 != int(i))
6157 CanUseUnpackHigh = false;
6158 if (SrcBytes[i] % 16 != int(i + SystemZ::VectorBytes / 2))
6159 CanUseUnpackLow = false;
6160 if (!CanUseUnpackLow && !CanUseUnpackHigh) {
6161 UnpackFromEltSize = UINT_MAX;
6162 return;
6163 }
6164 }
6165 if (!CanUseUnpackHigh)
6166 UnpackLow = true;
6167 }
6168 break;
6169 }
6170 }
6171 if (UnpackFromEltSize > 4)
6172 return;
6173
6174 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
6175 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
6176 << ".\n";
6177 dumpBytes(Bytes, "Original Bytes vector:"););
6178
6179 // Apply the unpack in reverse to the Bytes array.
6180 unsigned B = 0;
6181 if (UnpackLow) {
6182 while (B < SystemZ::VectorBytes / 2)
6183 Bytes[B++] = -1;
6184 }
6185 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
6186 Elt += UnpackFromEltSize;
6187 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
6188 Bytes[B] = Bytes[Elt];
6189 }
6190 if (!UnpackLow) {
6191 while (B < SystemZ::VectorBytes)
6192 Bytes[B++] = -1;
6193 }
6194
6195 // Remove the zero vector from Ops
6196 Ops.erase(&Ops[ZeroVecOpNo]);
6197 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6198 if (Bytes[I] >= 0) {
6199 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
6200 if (OpNo > ZeroVecOpNo)
6201 Bytes[I] -= SystemZ::VectorBytes;
6202 }
6203
6204 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
6205 dbgs() << "\n";);
6206}
6207
6208SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
6209 const SDLoc &DL,
6210 SDValue Op) {
6211 if (!unpackWasPrepared())
6212 return Op;
6213 unsigned InBits = UnpackFromEltSize * 8;
6214 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
6215 SystemZ::VectorBits / InBits);
6216 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
6217 unsigned OutBits = InBits * 2;
6218 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
6219 SystemZ::VectorBits / OutBits);
6220 return DAG.getNode(UnpackLow ? SystemZISD::UNPACKL_LOW
6221 : SystemZISD::UNPACKL_HIGH,
6222 DL, OutVT, PackedOp);
6223}
6224
6225// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
6227 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
6228 if (!Op.getOperand(I).isUndef())
6229 return false;
6230 return true;
6231}
6232
6233// Return a vector of type VT that contains Value in the first element.
6234// The other elements don't matter.
6236 SDValue Value) {
6237 // If we have a constant, replicate it to all elements and let the
6238 // BUILD_VECTOR lowering take care of it.
6239 if (Value.getOpcode() == ISD::Constant ||
6240 Value.getOpcode() == ISD::ConstantFP) {
6242 return DAG.getBuildVector(VT, DL, Ops);
6243 }
6244 if (Value.isUndef())
6245 return DAG.getUNDEF(VT);
6246 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
6247}
6248
6249// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
6250// element 1. Used for cases in which replication is cheap.
6252 SDValue Op0, SDValue Op1) {
6253 if (Op0.isUndef()) {
6254 if (Op1.isUndef())
6255 return DAG.getUNDEF(VT);
6256 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
6257 }
6258 if (Op1.isUndef())
6259 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
6260 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
6261 buildScalarToVector(DAG, DL, VT, Op0),
6262 buildScalarToVector(DAG, DL, VT, Op1));
6263}
6264
6265// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
6266// vector for them.
6268 SDValue Op1) {
6269 if (Op0.isUndef() && Op1.isUndef())
6270 return DAG.getUNDEF(MVT::v2i64);
6271 // If one of the two inputs is undefined then replicate the other one,
6272 // in order to avoid using another register unnecessarily.
6273 if (Op0.isUndef())
6274 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6275 else if (Op1.isUndef())
6276 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6277 else {
6278 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6279 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6280 }
6281 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
6282}
6283
6284// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
6285// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
6286// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
6287// would benefit from this representation and return it if so.
6289 BuildVectorSDNode *BVN) {
6290 EVT VT = BVN->getValueType(0);
6291 unsigned NumElements = VT.getVectorNumElements();
6292
6293 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
6294 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
6295 // need a BUILD_VECTOR, add an additional placeholder operand for that
6296 // BUILD_VECTOR and store its operands in ResidueOps.
6297 GeneralShuffle GS(VT);
6299 bool FoundOne = false;
6300 for (unsigned I = 0; I < NumElements; ++I) {
6301 SDValue Op = BVN->getOperand(I);
6302 if (Op.getOpcode() == ISD::TRUNCATE)
6303 Op = Op.getOperand(0);
6304 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6305 Op.getOperand(1).getOpcode() == ISD::Constant) {
6306 unsigned Elem = Op.getConstantOperandVal(1);
6307 if (!GS.add(Op.getOperand(0), Elem))
6308 return SDValue();
6309 FoundOne = true;
6310 } else if (Op.isUndef()) {
6311 GS.addUndef();
6312 } else {
6313 if (!GS.add(SDValue(), ResidueOps.size()))
6314 return SDValue();
6315 ResidueOps.push_back(BVN->getOperand(I));
6316 }
6317 }
6318
6319 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
6320 if (!FoundOne)
6321 return SDValue();
6322
6323 // Create the BUILD_VECTOR for the remaining elements, if any.
6324 if (!ResidueOps.empty()) {
6325 while (ResidueOps.size() < NumElements)
6326 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
6327 for (auto &Op : GS.Ops) {
6328 if (!Op.getNode()) {
6329 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
6330 break;
6331 }
6332 }
6333 }
6334 return GS.getNode(DAG, SDLoc(BVN));
6335}
6336
6337bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
6338 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
6339 return true;
6340 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
6341 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
6342 return true;
6343 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
6344 return true;
6345 return false;
6346}
6347
6348// Combine GPR scalar values Elems into a vector of type VT.
6349SDValue
6350SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
6351 SmallVectorImpl<SDValue> &Elems) const {
6352 // See whether there is a single replicated value.
6354 unsigned int NumElements = Elems.size();
6355 unsigned int Count = 0;
6356 for (auto Elem : Elems) {
6357 if (!Elem.isUndef()) {
6358 if (!Single.getNode())
6359 Single = Elem;
6360 else if (Elem != Single) {
6361 Single = SDValue();
6362 break;
6363 }
6364 Count += 1;
6365 }
6366 }
6367 // There are three cases here:
6368 //
6369 // - if the only defined element is a loaded one, the best sequence
6370 // is a replicating load.
6371 //
6372 // - otherwise, if the only defined element is an i64 value, we will
6373 // end up with the same VLVGP sequence regardless of whether we short-cut
6374 // for replication or fall through to the later code.
6375 //
6376 // - otherwise, if the only defined element is an i32 or smaller value,
6377 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
6378 // This is only a win if the single defined element is used more than once.
6379 // In other cases we're better off using a single VLVGx.
6380 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
6381 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
6382
6383 // If all elements are loads, use VLREP/VLEs (below).
6384 bool AllLoads = true;
6385 for (auto Elem : Elems)
6386 if (!isVectorElementLoad(Elem)) {
6387 AllLoads = false;
6388 break;
6389 }
6390
6391 // The best way of building a v2i64 from two i64s is to use VLVGP.
6392 if (VT == MVT::v2i64 && !AllLoads)
6393 return joinDwords(DAG, DL, Elems[0], Elems[1]);
6394
6395 // Use a 64-bit merge high to combine two doubles.
6396 if (VT == MVT::v2f64 && !AllLoads)
6397 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6398
6399 // Build v4f32 values directly from the FPRs:
6400 //
6401 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
6402 // V V VMRHF
6403 // <ABxx> <CDxx>
6404 // V VMRHG
6405 // <ABCD>
6406 if (VT == MVT::v4f32 && !AllLoads) {
6407 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6408 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
6409 // Avoid unnecessary undefs by reusing the other operand.
6410 if (Op01.isUndef())
6411 Op01 = Op23;
6412 else if (Op23.isUndef())
6413 Op23 = Op01;
6414 // Merging identical replications is a no-op.
6415 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
6416 return Op01;
6417 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
6418 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
6419 SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
6420 DL, MVT::v2i64, Op01, Op23);
6421 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6422 }
6423
6424 // Collect the constant terms.
6427
6428 unsigned NumConstants = 0;
6429 for (unsigned I = 0; I < NumElements; ++I) {
6430 SDValue Elem = Elems[I];
6431 if (Elem.getOpcode() == ISD::Constant ||
6432 Elem.getOpcode() == ISD::ConstantFP) {
6433 NumConstants += 1;
6434 Constants[I] = Elem;
6435 Done[I] = true;
6436 }
6437 }
6438 // If there was at least one constant, fill in the other elements of
6439 // Constants with undefs to get a full vector constant and use that
6440 // as the starting point.
6442 SDValue ReplicatedVal;
6443 if (NumConstants > 0) {
6444 for (unsigned I = 0; I < NumElements; ++I)
6445 if (!Constants[I].getNode())
6446 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
6447 Result = DAG.getBuildVector(VT, DL, Constants);
6448 } else {
6449 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
6450 // avoid a false dependency on any previous contents of the vector
6451 // register.
6452
6453 // Use a VLREP if at least one element is a load. Make sure to replicate
6454 // the load with the most elements having its value.
6455 std::map<const SDNode*, unsigned> UseCounts;
6456 SDNode *LoadMaxUses = nullptr;
6457 for (unsigned I = 0; I < NumElements; ++I)
6458 if (isVectorElementLoad(Elems[I])) {
6459 SDNode *Ld = Elems[I].getNode();
6460 unsigned Count = ++UseCounts[Ld];
6461 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < Count)
6462 LoadMaxUses = Ld;
6463 }
6464 if (LoadMaxUses != nullptr) {
6465 ReplicatedVal = SDValue(LoadMaxUses, 0);
6466 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
6467 } else {
6468 // Try to use VLVGP.
6469 unsigned I1 = NumElements / 2 - 1;
6470 unsigned I2 = NumElements - 1;
6471 bool Def1 = !Elems[I1].isUndef();
6472 bool Def2 = !Elems[I2].isUndef();
6473 if (Def1 || Def2) {
6474 SDValue Elem1 = Elems[Def1 ? I1 : I2];
6475 SDValue Elem2 = Elems[Def2 ? I2 : I1];
6476 Result = DAG.getNode(ISD::BITCAST, DL, VT,
6477 joinDwords(DAG, DL, Elem1, Elem2));
6478 Done[I1] = true;
6479 Done[I2] = true;
6480 } else
6481 Result = DAG.getUNDEF(VT);
6482 }
6483 }
6484
6485 // Use VLVGx to insert the other elements.
6486 for (unsigned I = 0; I < NumElements; ++I)
6487 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
6488 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
6489 DAG.getConstant(I, DL, MVT::i32));
6490 return Result;
6491}
6492
6493SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
6494 SelectionDAG &DAG) const {
6495 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
6496 SDLoc DL(Op);
6497 EVT VT = Op.getValueType();
6498
6499 if (BVN->isConstant()) {
6500 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
6501 return Op;
6502
6503 // Fall back to loading it from memory.
6504 return SDValue();
6505 }
6506
6507 // See if we should use shuffles to construct the vector from other vectors.
6508 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
6509 return Res;
6510
6511 // Detect SCALAR_TO_VECTOR conversions.
6513 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
6514
6515 // Otherwise use buildVector to build the vector up from GPRs.
6516 unsigned NumElements = Op.getNumOperands();
6518 for (unsigned I = 0; I < NumElements; ++I)
6519 Ops[I] = Op.getOperand(I);
6520 return buildVector(DAG, DL, VT, Ops);
6521}
6522
6523SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
6524 SelectionDAG &DAG) const {
6525 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
6526 SDLoc DL(Op);
6527 EVT VT = Op.getValueType();
6528 unsigned NumElements = VT.getVectorNumElements();
6529
6530 if (VSN->isSplat()) {
6531 SDValue Op0 = Op.getOperand(0);
6532 unsigned Index = VSN->getSplatIndex();
6533 assert(Index < VT.getVectorNumElements() &&
6534 "Splat index should be defined and in first operand");
6535 // See whether the value we're splatting is directly available as a scalar.
6536 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6538 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
6539 // Otherwise keep it as a vector-to-vector operation.
6540 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
6541 DAG.getTargetConstant(Index, DL, MVT::i32));
6542 }
6543
6544 GeneralShuffle GS(VT);
6545 for (unsigned I = 0; I < NumElements; ++I) {
6546 int Elt = VSN->getMaskElt(I);
6547 if (Elt < 0)
6548 GS.addUndef();
6549 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
6550 unsigned(Elt) % NumElements))
6551 return SDValue();
6552 }
6553 return GS.getNode(DAG, SDLoc(VSN));
6554}
6555
6556SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
6557 SelectionDAG &DAG) const {
6558 SDLoc DL(Op);
6559 // Just insert the scalar into element 0 of an undefined vector.
6560 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
6561 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
6562 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
6563}
6564
6565SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
6566 SelectionDAG &DAG) const {
6567 // Handle insertions of floating-point values.
6568 SDLoc DL(Op);
6569 SDValue Op0 = Op.getOperand(0);
6570 SDValue Op1 = Op.getOperand(1);
6571 SDValue Op2 = Op.getOperand(2);
6572 EVT VT = Op.getValueType();
6573
6574 // Insertions into constant indices of a v2f64 can be done using VPDI.
6575 // However, if the inserted value is a bitcast or a constant then it's
6576 // better to use GPRs, as below.
6577 if (VT == MVT::v2f64 &&
6578 Op1.getOpcode() != ISD::BITCAST &&
6579 Op1.getOpcode() != ISD::ConstantFP &&
6580 Op2.getOpcode() == ISD::Constant) {
6581 uint64_t Index = Op2->getAsZExtVal();
6582 unsigned Mask = VT.getVectorNumElements() - 1;
6583 if (Index <= Mask)
6584 return Op;
6585 }
6586
6587 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
6588 MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
6589 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
6590 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
6591 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
6592 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
6593 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6594}
6595
6596SDValue
6597SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
6598 SelectionDAG &DAG) const {
6599 // Handle extractions of floating-point values.
6600 SDLoc DL(Op);
6601 SDValue Op0 = Op.getOperand(0);
6602 SDValue Op1 = Op.getOperand(1);
6603 EVT VT = Op.getValueType();
6604 EVT VecVT = Op0.getValueType();
6605
6606 // Extractions of constant indices can be done directly.
6607 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
6608 uint64_t Index = CIndexN->getZExtValue();
6609 unsigned Mask = VecVT.getVectorNumElements() - 1;
6610 if (Index <= Mask)
6611 return Op;
6612 }
6613
6614 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
6615 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
6616 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
6617 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
6618 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
6619 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6620}
6621
6622SDValue SystemZTargetLowering::
6623lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6624 SDValue PackedOp = Op.getOperand(0);
6625 EVT OutVT = Op.getValueType();
6626 EVT InVT = PackedOp.getValueType();
6627 unsigned ToBits = OutVT.getScalarSizeInBits();
6628 unsigned FromBits = InVT.getScalarSizeInBits();
6629 unsigned StartOffset = 0;
6630
6631 // If the input is a VECTOR_SHUFFLE, there are a number of important
6632 // cases where we can directly implement the sign-extension of the
6633 // original input lanes of the shuffle.
6634 if (PackedOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
6635 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(PackedOp.getNode());
6636 ArrayRef<int> ShuffleMask = SVN->getMask();
6637 int OutNumElts = OutVT.getVectorNumElements();
6638
6639 // Recognize the special case where the sign-extension can be done
6640 // by the VSEG instruction. Handled via the default expander.
6641 if (ToBits == 64 && OutNumElts == 2) {
6642 int NumElem = ToBits / FromBits;
6643 if (ShuffleMask[0] == NumElem - 1 && ShuffleMask[1] == 2 * NumElem - 1)
6644 return SDValue();
6645 }
6646
6647 // Recognize the special case where we can fold the shuffle by
6648 // replacing some of the UNPACK_HIGH with UNPACK_LOW.
6649 int StartOffsetCandidate = -1;
6650 for (int Elt = 0; Elt < OutNumElts; Elt++) {
6651 if (ShuffleMask[Elt] == -1)
6652 continue;
6653 if (ShuffleMask[Elt] % OutNumElts == Elt) {
6654 if (StartOffsetCandidate == -1)
6655 StartOffsetCandidate = ShuffleMask[Elt] - Elt;
6656 if (StartOffsetCandidate == ShuffleMask[Elt] - Elt)
6657 continue;
6658 }
6659 StartOffsetCandidate = -1;
6660 break;
6661 }
6662 if (StartOffsetCandidate != -1) {
6663 StartOffset = StartOffsetCandidate;
6664 PackedOp = PackedOp.getOperand(0);
6665 }
6666 }
6667
6668 do {
6669 FromBits *= 2;
6670 unsigned OutNumElts = SystemZ::VectorBits / FromBits;
6671 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), OutNumElts);
6672 unsigned Opcode = SystemZISD::UNPACK_HIGH;
6673 if (StartOffset >= OutNumElts) {
6674 Opcode = SystemZISD::UNPACK_LOW;
6675 StartOffset -= OutNumElts;
6676 }
6677 PackedOp = DAG.getNode(Opcode, SDLoc(PackedOp), OutVT, PackedOp);
6678 } while (FromBits != ToBits);
6679 return PackedOp;
6680}
6681
6682// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
6683SDValue SystemZTargetLowering::
6684lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6685 SDValue PackedOp = Op.getOperand(0);
6686 SDLoc DL(Op);
6687 EVT OutVT = Op.getValueType();
6688 EVT InVT = PackedOp.getValueType();
6689 unsigned InNumElts = InVT.getVectorNumElements();
6690 unsigned OutNumElts = OutVT.getVectorNumElements();
6691 unsigned NumInPerOut = InNumElts / OutNumElts;
6692
6693 SDValue ZeroVec =
6694 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
6695
6696 SmallVector<int, 16> Mask(InNumElts);
6697 unsigned ZeroVecElt = InNumElts;
6698 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
6699 unsigned MaskElt = PackedElt * NumInPerOut;
6700 unsigned End = MaskElt + NumInPerOut - 1;
6701 for (; MaskElt < End; MaskElt++)
6702 Mask[MaskElt] = ZeroVecElt++;
6703 Mask[MaskElt] = PackedElt;
6704 }
6705 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
6706 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
6707}
6708
6709SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6710 unsigned ByScalar) const {
6711 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6712 SDValue Op0 = Op.getOperand(0);
6713 SDValue Op1 = Op.getOperand(1);
6714 SDLoc DL(Op);
6715 EVT VT = Op.getValueType();
6716 unsigned ElemBitSize = VT.getScalarSizeInBits();
6717
6718 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6719 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6720 APInt SplatBits, SplatUndef;
6721 unsigned SplatBitSize;
6722 bool HasAnyUndefs;
6723 // Check for constant splats. Use ElemBitSize as the minimum element
6724 // width and reject splats that need wider elements.
6725 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6726 ElemBitSize, true) &&
6727 SplatBitSize == ElemBitSize) {
6728 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6729 DL, MVT::i32);
6730 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6731 }
6732 // Check for variable splats.
6733 BitVector UndefElements;
6734 SDValue Splat = BVN->getSplatValue(&UndefElements);
6735 if (Splat) {
6736 // Since i32 is the smallest legal type, we either need a no-op
6737 // or a truncation.
6738 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6739 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6740 }
6741 }
6742
6743 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6744 // and the shift amount is directly available in a GPR.
6745 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6746 if (VSN->isSplat()) {
6747 SDValue VSNOp0 = VSN->getOperand(0);
6748 unsigned Index = VSN->getSplatIndex();
6749 assert(Index < VT.getVectorNumElements() &&
6750 "Splat index should be defined and in first operand");
6751 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6752 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6753 // Since i32 is the smallest legal type, we either need a no-op
6754 // or a truncation.
6755 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6756 VSNOp0.getOperand(Index));
6757 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6758 }
6759 }
6760 }
6761
6762 // Otherwise just treat the current form as legal.
6763 return Op;
6764}
6765
6766SDValue SystemZTargetLowering::lowerFSHL(SDValue Op, SelectionDAG &DAG) const {
6767 SDLoc DL(Op);
6768
6769 // i128 FSHL with a constant amount that is a multiple of 8 can be
6770 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6771 // facility, FSHL with a constant amount less than 8 can be implemented
6772 // via SHL_DOUBLE_BIT, and FSHL with other constant amounts by a
6773 // combination of the two.
6774 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6775 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6776 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6777 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6778 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6779 if (ShiftAmt > 120) {
6780 // For N in 121..128, fshl N == fshr (128 - N), and for 1 <= N < 8
6781 // SHR_DOUBLE_BIT emits fewer instructions.
6782 SDValue Val =
6783 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6784 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6785 return DAG.getBitcast(MVT::i128, Val);
6786 }
6787 SmallVector<int, 16> Mask(16);
6788 for (unsigned Elt = 0; Elt < 16; Elt++)
6789 Mask[Elt] = (ShiftAmt >> 3) + Elt;
6790 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6791 if ((ShiftAmt & 7) == 0)
6792 return DAG.getBitcast(MVT::i128, Shuf1);
6793 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op1, Op1, Mask);
6794 SDValue Val =
6795 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Shuf1, Shuf2,
6796 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6797 return DAG.getBitcast(MVT::i128, Val);
6798 }
6799 }
6800
6801 return SDValue();
6802}
6803
6804SDValue SystemZTargetLowering::lowerFSHR(SDValue Op, SelectionDAG &DAG) const {
6805 SDLoc DL(Op);
6806
6807 // i128 FSHR with a constant amount that is a multiple of 8 can be
6808 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6809 // facility, FSHR with a constant amount less than 8 can be implemented
6810 // via SHR_DOUBLE_BIT, and FSHR with other constant amounts by a
6811 // combination of the two.
6812 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6813 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6814 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6815 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6816 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6817 if (ShiftAmt > 120) {
6818 // For N in 121..128, fshr N == fshl (128 - N), and for 1 <= N < 8
6819 // SHL_DOUBLE_BIT emits fewer instructions.
6820 SDValue Val =
6821 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6822 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6823 return DAG.getBitcast(MVT::i128, Val);
6824 }
6825 SmallVector<int, 16> Mask(16);
6826 for (unsigned Elt = 0; Elt < 16; Elt++)
6827 Mask[Elt] = 16 - (ShiftAmt >> 3) + Elt;
6828 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6829 if ((ShiftAmt & 7) == 0)
6830 return DAG.getBitcast(MVT::i128, Shuf1);
6831 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op0, Mask);
6832 SDValue Val =
6833 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Shuf2, Shuf1,
6834 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6835 return DAG.getBitcast(MVT::i128, Val);
6836 }
6837 }
6838
6839 return SDValue();
6840}
6841
6843 SDLoc DL(Op);
6844 SDValue Src = Op.getOperand(0);
6845 MVT DstVT = Op.getSimpleValueType();
6846
6848 unsigned SrcAS = N->getSrcAddressSpace();
6849
6850 assert(SrcAS != N->getDestAddressSpace() &&
6851 "addrspacecast must be between different address spaces");
6852
6853 // addrspacecast [0 <- 1] : Assinging a ptr32 value to a 64-bit pointer.
6854 // addrspacecast [1 <- 0] : Assigining a 64-bit pointer to a ptr32 value.
6855 if (SrcAS == SYSTEMZAS::PTR32 && DstVT == MVT::i64) {
6856 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Src,
6857 DAG.getConstant(0x7fffffff, DL, MVT::i32));
6858 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
6859 } else if (DstVT == MVT::i32) {
6860 Op = DAG.getNode(ISD::TRUNCATE, DL, DstVT, Src);
6861 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Op,
6862 DAG.getConstant(0x7fffffff, DL, MVT::i32));
6863 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
6864 } else {
6865 report_fatal_error("Bad address space in addrspacecast");
6866 }
6867 return Op;
6868}
6869
6870SDValue SystemZTargetLowering::lowerFP_EXTEND(SDValue Op,
6871 SelectionDAG &DAG) const {
6872 SDValue In = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
6873 if (In.getSimpleValueType() != MVT::f16)
6874 return Op; // Legal
6875 return SDValue(); // Let legalizer emit the libcall.
6876}
6877
6879 MVT VT, SDValue Arg, SDLoc DL,
6880 SDValue Chain, bool IsStrict) const {
6881 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!");
6882 MakeLibCallOptions CallOptions;
6883 SDValue Result;
6884 std::tie(Result, Chain) =
6885 makeLibCall(DAG, LC, VT, Arg, CallOptions, DL, Chain);
6886 return IsStrict ? DAG.getMergeValues({Result, Chain}, DL) : Result;
6887}
6888
6889SDValue SystemZTargetLowering::lower_FP_TO_INT(SDValue Op,
6890 SelectionDAG &DAG) const {
6891 bool IsSigned = (Op->getOpcode() == ISD::FP_TO_SINT ||
6892 Op->getOpcode() == ISD::STRICT_FP_TO_SINT);
6893 bool IsStrict = Op->isStrictFPOpcode();
6894 SDLoc DL(Op);
6895 MVT VT = Op.getSimpleValueType();
6896 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
6897 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
6898 EVT InVT = InOp.getValueType();
6899
6900 // FP to unsigned is not directly supported on z10. Promoting an i32
6901 // result to (signed) i64 doesn't generate an inexact condition (fp
6902 // exception) for values that are outside the i32 range but in the i64
6903 // range, so use the default expansion.
6904 if (!Subtarget.hasFPExtension() && !IsSigned)
6905 // Expand i32/i64. F16 values will be recognized to fit and extended.
6906 return SDValue();
6907
6908 // Conversion from f16 is done via f32.
6909 if (InOp.getSimpleValueType() == MVT::f16) {
6911 LowerOperationWrapper(Op.getNode(), Results, DAG);
6912 return DAG.getMergeValues(Results, DL);
6913 }
6914
6915 if (VT == MVT::i128) {
6916 RTLIB::Libcall LC =
6917 IsSigned ? RTLIB::getFPTOSINT(InVT, VT) : RTLIB::getFPTOUINT(InVT, VT);
6918 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
6919 }
6920
6921 return Op; // Legal
6922}
6923
6924SDValue SystemZTargetLowering::lower_INT_TO_FP(SDValue Op,
6925 SelectionDAG &DAG) const {
6926 bool IsSigned = (Op->getOpcode() == ISD::SINT_TO_FP ||
6927 Op->getOpcode() == ISD::STRICT_SINT_TO_FP);
6928 bool IsStrict = Op->isStrictFPOpcode();
6929 SDLoc DL(Op);
6930 MVT VT = Op.getSimpleValueType();
6931 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
6932 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
6933 EVT InVT = InOp.getValueType();
6934
6935 // Conversion to f16 is done via f32.
6936 if (VT == MVT::f16) {
6938 LowerOperationWrapper(Op.getNode(), Results, DAG);
6939 return DAG.getMergeValues(Results, DL);
6940 }
6941
6942 // Unsigned to fp is not directly supported on z10.
6943 if (!Subtarget.hasFPExtension() && !IsSigned)
6944 return SDValue(); // Expand i64.
6945
6946 if (InVT == MVT::i128) {
6947 RTLIB::Libcall LC =
6948 IsSigned ? RTLIB::getSINTTOFP(InVT, VT) : RTLIB::getUINTTOFP(InVT, VT);
6949 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
6950 }
6951
6952 return Op; // Legal
6953}
6954
6955// Shift the lower 2 bytes of Op to the left in order to insert into the
6956// upper 2 bytes of the FP register.
6958 assert(Op.getSimpleValueType() == MVT::i64 &&
6959 "Expexted to convert i64 to f16.");
6960 SDLoc DL(Op);
6961 SDValue Shft = DAG.getNode(ISD::SHL, DL, MVT::i64, Op,
6962 DAG.getConstant(48, DL, MVT::i64));
6963 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shft);
6964 SDValue F16Val =
6965 DAG.getTargetExtractSubreg(SystemZ::subreg_h16, DL, MVT::f16, BCast);
6966 return F16Val;
6967}
6968
6969// Extract Op into GPR and shift the 2 f16 bytes to the right.
6971 assert(Op.getSimpleValueType() == MVT::f16 &&
6972 "Expected to convert f16 to i64.");
6973 SDNode *U32 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
6974 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h16, DL, MVT::f64,
6975 SDValue(U32, 0), Op);
6976 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
6977 SDValue Shft = DAG.getNode(ISD::SRL, DL, MVT::i64, BCast,
6978 DAG.getConstant(48, DL, MVT::i32));
6979 return Shft;
6980}
6981
6982// Lower an f16 LOAD in case of no vector support.
6983SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op,
6984 SelectionDAG &DAG) const {
6985 EVT RegVT = Op.getValueType();
6986 assert(RegVT == MVT::f16 && "Expected to lower an f16 load.");
6987 (void)RegVT;
6988
6989 // Load as integer.
6990 SDLoc DL(Op);
6991 SDValue NewLd;
6992 if (auto *AtomicLd = dyn_cast<AtomicSDNode>(Op.getNode())) {
6993 assert(EVT(RegVT) == AtomicLd->getMemoryVT() && "Unhandled f16 load");
6994 NewLd = DAG.getAtomicLoad(ISD::EXTLOAD, DL, MVT::i16, MVT::i64,
6995 AtomicLd->getChain(), AtomicLd->getBasePtr(),
6996 AtomicLd->getMemOperand());
6997 } else {
6998 LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
6999 assert(EVT(RegVT) == Ld->getMemoryVT() && "Unhandled f16 load");
7000 NewLd = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i64, Ld->getChain(),
7001 Ld->getBasePtr(), Ld->getPointerInfo(), MVT::i16,
7002 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
7003 }
7004 SDValue F16Val = convertToF16(NewLd, DAG);
7005 return DAG.getMergeValues({F16Val, NewLd.getValue(1)}, DL);
7006}
7007
7008// Lower an f16 STORE in case of no vector support.
7009SDValue SystemZTargetLowering::lowerStoreF16(SDValue Op,
7010 SelectionDAG &DAG) const {
7011 SDLoc DL(Op);
7012 SDValue Shft = convertFromF16(Op->getOperand(1), DL, DAG);
7013
7014 if (auto *AtomicSt = dyn_cast<AtomicSDNode>(Op.getNode()))
7015 return DAG.getAtomic(ISD::ATOMIC_STORE, DL, MVT::i16, AtomicSt->getChain(),
7016 Shft, AtomicSt->getBasePtr(),
7017 AtomicSt->getMemOperand());
7018
7019 StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
7020 return DAG.getTruncStore(St->getChain(), DL, Shft, St->getBasePtr(), MVT::i16,
7021 St->getMemOperand());
7022}
7023
7024SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
7025 SelectionDAG &DAG) const {
7026 SDLoc DL(Op);
7027 MVT ResultVT = Op.getSimpleValueType();
7028 SDValue Arg = Op.getOperand(0);
7029 unsigned Check = Op.getConstantOperandVal(1);
7030
7031 unsigned TDCMask = 0;
7032 if (Check & fcSNan)
7034 if (Check & fcQNan)
7036 if (Check & fcPosInf)
7038 if (Check & fcNegInf)
7040 if (Check & fcPosNormal)
7042 if (Check & fcNegNormal)
7044 if (Check & fcPosSubnormal)
7046 if (Check & fcNegSubnormal)
7048 if (Check & fcPosZero)
7049 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
7050 if (Check & fcNegZero)
7051 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
7052 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
7053
7054 if (Arg.getSimpleValueType() == MVT::f16)
7055 Arg = DAG.getFPExtendOrRound(Arg, SDLoc(Arg), MVT::f32);
7056 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
7057 return getCCResult(DAG, Intr);
7058}
7059
7060SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
7061 SelectionDAG &DAG) const {
7062 SDLoc DL(Op);
7063 SDValue Chain = Op.getOperand(0);
7064
7065 // STCKF only supports a memory operand, so we have to use a temporary.
7066 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
7067 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
7068 MachinePointerInfo MPI =
7070
7071 // Use STCFK to store the TOD clock into the temporary.
7072 SDValue StoreOps[] = {Chain, StackPtr};
7073 Chain = DAG.getMemIntrinsicNode(
7074 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
7075 MPI, MaybeAlign(), MachineMemOperand::MOStore);
7076
7077 // And read it back from there.
7078 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
7079}
7080
7082 SelectionDAG &DAG) const {
7083 switch (Op.getOpcode()) {
7084 case ISD::FRAMEADDR:
7085 return lowerFRAMEADDR(Op, DAG);
7086 case ISD::RETURNADDR:
7087 return lowerRETURNADDR(Op, DAG);
7088 case ISD::BR_CC:
7089 return lowerBR_CC(Op, DAG);
7090 case ISD::SELECT_CC:
7091 return lowerSELECT_CC(Op, DAG);
7092 case ISD::SETCC:
7093 return lowerSETCC(Op, DAG);
7094 case ISD::STRICT_FSETCC:
7095 return lowerSTRICT_FSETCC(Op, DAG, false);
7097 return lowerSTRICT_FSETCC(Op, DAG, true);
7098 case ISD::GlobalAddress:
7099 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
7101 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
7102 case ISD::BlockAddress:
7103 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
7104 case ISD::JumpTable:
7105 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
7106 case ISD::ConstantPool:
7107 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
7108 case ISD::BITCAST:
7109 return lowerBITCAST(Op, DAG);
7110 case ISD::VASTART:
7111 return lowerVASTART(Op, DAG);
7112 case ISD::VACOPY:
7113 return lowerVACOPY(Op, DAG);
7115 return lowerDYNAMIC_STACKALLOC(Op, DAG);
7117 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
7118 case ISD::MULHS:
7119 return lowerMULH(Op, DAG, SystemZISD::SMUL_LOHI);
7120 case ISD::MULHU:
7121 return lowerMULH(Op, DAG, SystemZISD::UMUL_LOHI);
7122 case ISD::SMUL_LOHI:
7123 return lowerSMUL_LOHI(Op, DAG);
7124 case ISD::UMUL_LOHI:
7125 return lowerUMUL_LOHI(Op, DAG);
7126 case ISD::SDIVREM:
7127 return lowerSDIVREM(Op, DAG);
7128 case ISD::UDIVREM:
7129 return lowerUDIVREM(Op, DAG);
7130 case ISD::SADDO:
7131 case ISD::SSUBO:
7132 case ISD::UADDO:
7133 case ISD::USUBO:
7134 return lowerXALUO(Op, DAG);
7135 case ISD::UADDO_CARRY:
7136 case ISD::USUBO_CARRY:
7137 return lowerUADDSUBO_CARRY(Op, DAG);
7138 case ISD::OR:
7139 return lowerOR(Op, DAG);
7140 case ISD::CTPOP:
7141 return lowerCTPOP(Op, DAG);
7142 case ISD::VECREDUCE_ADD:
7143 return lowerVECREDUCE_ADD(Op, DAG);
7144 case ISD::ATOMIC_FENCE:
7145 return lowerATOMIC_FENCE(Op, DAG);
7146 case ISD::ATOMIC_SWAP:
7147 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
7148 case ISD::ATOMIC_STORE:
7149 return lowerATOMIC_STORE(Op, DAG);
7150 case ISD::ATOMIC_LOAD:
7151 return lowerATOMIC_LOAD(Op, DAG);
7153 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
7155 return lowerATOMIC_LOAD_SUB(Op, DAG);
7157 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
7159 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
7161 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
7163 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
7165 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
7167 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
7169 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
7171 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
7173 return lowerATOMIC_CMP_SWAP(Op, DAG);
7174 case ISD::STACKSAVE:
7175 return lowerSTACKSAVE(Op, DAG);
7176 case ISD::STACKRESTORE:
7177 return lowerSTACKRESTORE(Op, DAG);
7178 case ISD::PREFETCH:
7179 return lowerPREFETCH(Op, DAG);
7181 return lowerINTRINSIC_W_CHAIN(Op, DAG);
7183 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
7184 case ISD::BUILD_VECTOR:
7185 return lowerBUILD_VECTOR(Op, DAG);
7187 return lowerVECTOR_SHUFFLE(Op, DAG);
7189 return lowerSCALAR_TO_VECTOR(Op, DAG);
7191 return lowerINSERT_VECTOR_ELT(Op, DAG);
7193 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7195 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
7197 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
7198 case ISD::SHL:
7199 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
7200 case ISD::SRL:
7201 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
7202 case ISD::SRA:
7203 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
7204 case ISD::ADDRSPACECAST:
7205 return lowerAddrSpaceCast(Op, DAG);
7206 case ISD::ROTL:
7207 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
7208 case ISD::FSHL:
7209 return lowerFSHL(Op, DAG);
7210 case ISD::FSHR:
7211 return lowerFSHR(Op, DAG);
7212 case ISD::FP_EXTEND:
7214 return lowerFP_EXTEND(Op, DAG);
7215 case ISD::FP_TO_UINT:
7216 case ISD::FP_TO_SINT:
7219 return lower_FP_TO_INT(Op, DAG);
7220 case ISD::UINT_TO_FP:
7221 case ISD::SINT_TO_FP:
7224 return lower_INT_TO_FP(Op, DAG);
7225 case ISD::LOAD:
7226 return lowerLoadF16(Op, DAG);
7227 case ISD::STORE:
7228 return lowerStoreF16(Op, DAG);
7229 case ISD::IS_FPCLASS:
7230 return lowerIS_FPCLASS(Op, DAG);
7231 case ISD::GET_ROUNDING:
7232 return lowerGET_ROUNDING(Op, DAG);
7234 return lowerREADCYCLECOUNTER(Op, DAG);
7237 // These operations are legal on our platform, but we cannot actually
7238 // set the operation action to Legal as common code would treat this
7239 // as equivalent to Expand. Instead, we keep the operation action to
7240 // Custom and just leave them unchanged here.
7241 return Op;
7242
7243 default:
7244 llvm_unreachable("Unexpected node to lower");
7245 }
7246}
7247
7249 const SDLoc &SL) {
7250 // If i128 is legal, just use a normal bitcast.
7251 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7252 return DAG.getBitcast(MVT::f128, Src);
7253
7254 // Otherwise, f128 must live in FP128, so do a partwise move.
7256 &SystemZ::FP128BitRegClass);
7257
7258 SDValue Hi, Lo;
7259 std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);
7260
7261 Hi = DAG.getBitcast(MVT::f64, Hi);
7262 Lo = DAG.getBitcast(MVT::f64, Lo);
7263
7264 SDNode *Pair = DAG.getMachineNode(
7265 SystemZ::REG_SEQUENCE, SL, MVT::f128,
7266 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo,
7267 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi,
7268 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)});
7269 return SDValue(Pair, 0);
7270}
7271
7273 const SDLoc &SL) {
7274 // If i128 is legal, just use a normal bitcast.
7275 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7276 return DAG.getBitcast(MVT::i128, Src);
7277
7278 // Otherwise, f128 must live in FP128, so do a partwise move.
7280 &SystemZ::FP128BitRegClass);
7281
7282 SDValue LoFP =
7283 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
7284 SDValue HiFP =
7285 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);
7286 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
7287 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
7288
7289 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);
7290}
7291
7292// Lower operations with invalid operand or result types.
7293void
7296 SelectionDAG &DAG) const {
7297 switch (N->getOpcode()) {
7298 case ISD::ATOMIC_LOAD: {
7299 SDLoc DL(N);
7300 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
7301 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
7302 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7303 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
7304 DL, Tys, Ops, MVT::i128, MMO);
7305
7306 SDValue Lowered = lowerGR128ToI128(DAG, Res);
7307 if (N->getValueType(0) == MVT::f128)
7308 Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);
7309 Results.push_back(Lowered);
7310 Results.push_back(Res.getValue(1));
7311 break;
7312 }
7313 case ISD::ATOMIC_STORE: {
7314 SDLoc DL(N);
7315 SDVTList Tys = DAG.getVTList(MVT::Other);
7316 SDValue Val = N->getOperand(1);
7317 if (Val.getValueType() == MVT::f128)
7318 Val = expandBitCastF128ToI128(DAG, Val, DL);
7319 Val = lowerI128ToGR128(DAG, Val);
7320
7321 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
7322 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7323 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128,
7324 DL, Tys, Ops, MVT::i128, MMO);
7325 // We have to enforce sequential consistency by performing a
7326 // serialization operation after the store.
7327 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
7329 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
7330 MVT::Other, Res), 0);
7331 Results.push_back(Res);
7332 break;
7333 }
7335 SDLoc DL(N);
7336 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
7337 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
7338 lowerI128ToGR128(DAG, N->getOperand(2)),
7339 lowerI128ToGR128(DAG, N->getOperand(3)) };
7340 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7341 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128,
7342 DL, Tys, Ops, MVT::i128, MMO);
7343 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
7345 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
7346 Results.push_back(lowerGR128ToI128(DAG, Res));
7347 Results.push_back(Success);
7348 Results.push_back(Res.getValue(2));
7349 break;
7350 }
7351 case ISD::BITCAST: {
7352 if (useSoftFloat())
7353 return;
7354 SDLoc DL(N);
7355 SDValue Src = N->getOperand(0);
7356 EVT SrcVT = Src.getValueType();
7357 EVT ResVT = N->getValueType(0);
7358 if (ResVT == MVT::i128 && SrcVT == MVT::f128)
7359 Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));
7360 else if (SrcVT == MVT::i16 && ResVT == MVT::f16) {
7361 if (Subtarget.hasVector()) {
7362 SDValue In32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Src);
7363 Results.push_back(SDValue(
7364 DAG.getMachineNode(SystemZ::LEFR_16, DL, MVT::f16, In32), 0));
7365 } else {
7366 SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Src);
7367 Results.push_back(convertToF16(In64, DAG));
7368 }
7369 } else if (SrcVT == MVT::f16 && ResVT == MVT::i16) {
7370 SDValue ExtractedVal =
7371 Subtarget.hasVector()
7372 ? SDValue(DAG.getMachineNode(SystemZ::LFER_16, DL, MVT::i32, Src),
7373 0)
7374 : convertFromF16(Src, DL, DAG);
7375 Results.push_back(DAG.getZExtOrTrunc(ExtractedVal, DL, ResVT));
7376 }
7377 break;
7378 }
7379 case ISD::UINT_TO_FP:
7380 case ISD::SINT_TO_FP:
7383 if (useSoftFloat())
7384 return;
7385 bool IsStrict = N->isStrictFPOpcode();
7386 SDLoc DL(N);
7387 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7388 EVT ResVT = N->getValueType(0);
7389 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7390 if (ResVT == MVT::f16) {
7391 if (!IsStrict) {
7392 SDValue OpF32 = DAG.getNode(N->getOpcode(), DL, MVT::f32, InOp);
7393 Results.push_back(DAG.getFPExtendOrRound(OpF32, DL, MVT::f16));
7394 } else {
7395 SDValue OpF32 =
7396 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::f32, MVT::Other),
7397 {Chain, InOp});
7398 SDValue F16Res;
7399 std::tie(F16Res, Chain) = DAG.getStrictFPExtendOrRound(
7400 OpF32, OpF32.getValue(1), DL, MVT::f16);
7401 Results.push_back(F16Res);
7402 Results.push_back(Chain);
7403 }
7404 }
7405 break;
7406 }
7407 case ISD::FP_TO_UINT:
7408 case ISD::FP_TO_SINT:
7411 if (useSoftFloat())
7412 return;
7413 bool IsStrict = N->isStrictFPOpcode();
7414 SDLoc DL(N);
7415 EVT ResVT = N->getValueType(0);
7416 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7417 EVT InVT = InOp->getValueType(0);
7418 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7419 if (InVT == MVT::f16) {
7420 if (!IsStrict) {
7421 SDValue InF32 = DAG.getFPExtendOrRound(InOp, DL, MVT::f32);
7422 Results.push_back(DAG.getNode(N->getOpcode(), DL, ResVT, InF32));
7423 } else {
7424 SDValue InF32;
7425 std::tie(InF32, Chain) =
7426 DAG.getStrictFPExtendOrRound(InOp, Chain, DL, MVT::f32);
7427 SDValue OpF32 =
7428 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(ResVT, MVT::Other),
7429 {Chain, InF32});
7430 Results.push_back(OpF32);
7431 Results.push_back(OpF32.getValue(1));
7432 }
7433 }
7434 break;
7435 }
7436 default:
7437 llvm_unreachable("Unexpected node to lower");
7438 }
7439}
7440
7441void
7447
7448// Return true if VT is a vector whose elements are a whole number of bytes
7449// in width. Also check for presence of vector support.
7450bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
7451 if (!Subtarget.hasVector())
7452 return false;
7453
7454 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
7455}
7456
7457// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
7458// producing a result of type ResVT. Op is a possibly bitcast version
7459// of the input vector and Index is the index (based on type VecVT) that
7460// should be extracted. Return the new extraction if a simplification
7461// was possible or if Force is true.
7462SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
7463 EVT VecVT, SDValue Op,
7464 unsigned Index,
7465 DAGCombinerInfo &DCI,
7466 bool Force) const {
7467 SelectionDAG &DAG = DCI.DAG;
7468
7469 // The number of bytes being extracted.
7470 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7471
7472 for (;;) {
7473 unsigned Opcode = Op.getOpcode();
7474 if (Opcode == ISD::BITCAST)
7475 // Look through bitcasts.
7476 Op = Op.getOperand(0);
7477 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
7478 canTreatAsByteVector(Op.getValueType())) {
7479 // Get a VPERM-like permute mask and see whether the bytes covered
7480 // by the extracted element are a contiguous sequence from one
7481 // source operand.
7483 if (!getVPermMask(Op, Bytes))
7484 break;
7485 int First;
7486 if (!getShuffleInput(Bytes, Index * BytesPerElement,
7487 BytesPerElement, First))
7488 break;
7489 if (First < 0)
7490 return DAG.getUNDEF(ResVT);
7491 // Make sure the contiguous sequence starts at a multiple of the
7492 // original element size.
7493 unsigned Byte = unsigned(First) % Bytes.size();
7494 if (Byte % BytesPerElement != 0)
7495 break;
7496 // We can get the extracted value directly from an input.
7497 Index = Byte / BytesPerElement;
7498 Op = Op.getOperand(unsigned(First) / Bytes.size());
7499 Force = true;
7500 } else if (Opcode == ISD::BUILD_VECTOR &&
7501 canTreatAsByteVector(Op.getValueType())) {
7502 // We can only optimize this case if the BUILD_VECTOR elements are
7503 // at least as wide as the extracted value.
7504 EVT OpVT = Op.getValueType();
7505 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7506 if (OpBytesPerElement < BytesPerElement)
7507 break;
7508 // Make sure that the least-significant bit of the extracted value
7509 // is the least significant bit of an input.
7510 unsigned End = (Index + 1) * BytesPerElement;
7511 if (End % OpBytesPerElement != 0)
7512 break;
7513 // We're extracting the low part of one operand of the BUILD_VECTOR.
7514 Op = Op.getOperand(End / OpBytesPerElement - 1);
7515 if (!Op.getValueType().isInteger()) {
7516 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
7517 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
7518 DCI.AddToWorklist(Op.getNode());
7519 }
7520 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
7521 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7522 if (VT != ResVT) {
7523 DCI.AddToWorklist(Op.getNode());
7524 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
7525 }
7526 return Op;
7527 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
7529 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
7530 canTreatAsByteVector(Op.getValueType()) &&
7531 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
7532 // Make sure that only the unextended bits are significant.
7533 EVT ExtVT = Op.getValueType();
7534 EVT OpVT = Op.getOperand(0).getValueType();
7535 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
7536 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7537 unsigned Byte = Index * BytesPerElement;
7538 unsigned SubByte = Byte % ExtBytesPerElement;
7539 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
7540 if (SubByte < MinSubByte ||
7541 SubByte + BytesPerElement > ExtBytesPerElement)
7542 break;
7543 // Get the byte offset of the unextended element
7544 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
7545 // ...then add the byte offset relative to that element.
7546 Byte += SubByte - MinSubByte;
7547 if (Byte % BytesPerElement != 0)
7548 break;
7549 Op = Op.getOperand(0);
7550 Index = Byte / BytesPerElement;
7551 Force = true;
7552 } else
7553 break;
7554 }
7555 if (Force) {
7556 if (Op.getValueType() != VecVT) {
7557 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
7558 DCI.AddToWorklist(Op.getNode());
7559 }
7560 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
7561 DAG.getConstant(Index, DL, MVT::i32));
7562 }
7563 return SDValue();
7564}
7565
7566// Optimize vector operations in scalar value Op on the basis that Op
7567// is truncated to TruncVT.
7568SDValue SystemZTargetLowering::combineTruncateExtract(
7569 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
7570 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
7571 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
7572 // of type TruncVT.
7573 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7574 TruncVT.getSizeInBits() % 8 == 0) {
7575 SDValue Vec = Op.getOperand(0);
7576 EVT VecVT = Vec.getValueType();
7577 if (canTreatAsByteVector(VecVT)) {
7578 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
7579 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7580 unsigned TruncBytes = TruncVT.getStoreSize();
7581 if (BytesPerElement % TruncBytes == 0) {
7582 // Calculate the value of Y' in the above description. We are
7583 // splitting the original elements into Scale equal-sized pieces
7584 // and for truncation purposes want the last (least-significant)
7585 // of these pieces for IndexN. This is easiest to do by calculating
7586 // the start index of the following element and then subtracting 1.
7587 unsigned Scale = BytesPerElement / TruncBytes;
7588 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
7589
7590 // Defer the creation of the bitcast from X to combineExtract,
7591 // which might be able to optimize the extraction.
7592 VecVT = EVT::getVectorVT(*DCI.DAG.getContext(),
7593 MVT::getIntegerVT(TruncBytes * 8),
7594 VecVT.getStoreSize() / TruncBytes);
7595 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
7596 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
7597 }
7598 }
7599 }
7600 }
7601 return SDValue();
7602}
7603
7604SDValue SystemZTargetLowering::combineZERO_EXTEND(
7605 SDNode *N, DAGCombinerInfo &DCI) const {
7606 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
7607 SelectionDAG &DAG = DCI.DAG;
7608 SDValue N0 = N->getOperand(0);
7609 EVT VT = N->getValueType(0);
7610 if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) {
7611 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
7612 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7613 if (TrueOp && FalseOp) {
7614 SDLoc DL(N0);
7615 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
7616 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
7617 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
7618 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
7619 // If N0 has multiple uses, change other uses as well.
7620 if (!N0.hasOneUse()) {
7621 SDValue TruncSelect =
7622 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
7623 DCI.CombineTo(N0.getNode(), TruncSelect);
7624 }
7625 return NewSelect;
7626 }
7627 }
7628 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
7629 // of the result is smaller than the size of X and all the truncated bits
7630 // of X are already zero.
7631 if (N0.getOpcode() == ISD::XOR &&
7632 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
7633 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7634 N0.getOperand(1).getOpcode() == ISD::Constant) {
7635 SDValue X = N0.getOperand(0).getOperand(0);
7636 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
7637 KnownBits Known = DAG.computeKnownBits(X);
7638 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
7639 N0.getValueSizeInBits(),
7640 VT.getSizeInBits());
7641 if (TruncatedBits.isSubsetOf(Known.Zero)) {
7642 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7643 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
7644 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
7645 X, DAG.getConstant(Mask, SDLoc(N0), VT));
7646 }
7647 }
7648 }
7649 // Recognize patterns for VECTOR SUBTRACT COMPUTE BORROW INDICATION
7650 // and VECTOR ADD COMPUTE CARRY for i128:
7651 // (zext (setcc_uge X Y)) --> (VSCBI X Y)
7652 // (zext (setcc_ule Y X)) --> (VSCBI X Y)
7653 // (zext (setcc_ult (add X Y) X/Y) -> (VACC X Y)
7654 // (zext (setcc_ugt X/Y (add X Y)) -> (VACC X Y)
7655 // For vector types, these patterns are recognized in the .td file.
7656 if (N0.getOpcode() == ISD::SETCC && isTypeLegal(VT) && VT == MVT::i128 &&
7657 N0.getOperand(0).getValueType() == VT) {
7658 SDValue Op0 = N0.getOperand(0);
7659 SDValue Op1 = N0.getOperand(1);
7660 const ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7661 switch (CC) {
7662 case ISD::SETULE:
7663 std::swap(Op0, Op1);
7664 [[fallthrough]];
7665 case ISD::SETUGE:
7666 return DAG.getNode(SystemZISD::VSCBI, SDLoc(N0), VT, Op0, Op1);
7667 case ISD::SETUGT:
7668 std::swap(Op0, Op1);
7669 [[fallthrough]];
7670 case ISD::SETULT:
7671 if (Op0->hasOneUse() && Op0->getOpcode() == ISD::ADD &&
7672 (Op0->getOperand(0) == Op1 || Op0->getOperand(1) == Op1))
7673 return DAG.getNode(SystemZISD::VACC, SDLoc(N0), VT, Op0->getOperand(0),
7674 Op0->getOperand(1));
7675 break;
7676 default:
7677 break;
7678 }
7679 }
7680
7681 return SDValue();
7682}
7683
7684SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
7685 SDNode *N, DAGCombinerInfo &DCI) const {
7686 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
7687 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
7688 // into (select_cc LHS, RHS, -1, 0, COND)
7689 SelectionDAG &DAG = DCI.DAG;
7690 SDValue N0 = N->getOperand(0);
7691 EVT VT = N->getValueType(0);
7692 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7693 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
7694 N0 = N0.getOperand(0);
7695 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
7696 SDLoc DL(N0);
7697 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
7698 DAG.getAllOnesConstant(DL, VT),
7699 DAG.getConstant(0, DL, VT), N0.getOperand(2) };
7700 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
7701 }
7702 return SDValue();
7703}
7704
7705SDValue SystemZTargetLowering::combineSIGN_EXTEND(
7706 SDNode *N, DAGCombinerInfo &DCI) const {
7707 // Convert (sext (ashr (shl X, C1), C2)) to
7708 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
7709 // cheap as narrower ones.
7710 SelectionDAG &DAG = DCI.DAG;
7711 SDValue N0 = N->getOperand(0);
7712 EVT VT = N->getValueType(0);
7713 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
7714 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7715 SDValue Inner = N0.getOperand(0);
7716 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
7717 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
7718 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
7719 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
7720 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
7721 EVT ShiftVT = N0.getOperand(1).getValueType();
7722 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
7723 Inner.getOperand(0));
7724 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
7725 DAG.getConstant(NewShlAmt, SDLoc(Inner),
7726 ShiftVT));
7727 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
7728 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
7729 }
7730 }
7731 }
7732
7733 return SDValue();
7734}
7735
7736SDValue SystemZTargetLowering::combineMERGE(
7737 SDNode *N, DAGCombinerInfo &DCI) const {
7738 SelectionDAG &DAG = DCI.DAG;
7739 unsigned Opcode = N->getOpcode();
7740 SDValue Op0 = N->getOperand(0);
7741 SDValue Op1 = N->getOperand(1);
7742 if (Op0.getOpcode() == ISD::BITCAST)
7743 Op0 = Op0.getOperand(0);
7745 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
7746 // for v4f32.
7747 if (Op1 == N->getOperand(0))
7748 return Op1;
7749 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
7750 EVT VT = Op1.getValueType();
7751 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
7752 if (ElemBytes <= 4) {
7753 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
7754 SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
7755 EVT InVT = VT.changeVectorElementTypeToInteger();
7756 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
7757 SystemZ::VectorBytes / ElemBytes / 2);
7758 if (VT != InVT) {
7759 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
7760 DCI.AddToWorklist(Op1.getNode());
7761 }
7762 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
7763 DCI.AddToWorklist(Op.getNode());
7764 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
7765 }
7766 }
7767 return SDValue();
7768}
7769
7770static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7771 SDNode *&HiPart) {
7772 LoPart = HiPart = nullptr;
7773
7774 // Scan through all users.
7775 for (SDUse &Use : LD->uses()) {
7776 // Skip the uses of the chain.
7777 if (Use.getResNo() != 0)
7778 continue;
7779
7780 // Verify every user is a TRUNCATE to i64 of the low or high half.
7781 SDNode *User = Use.getUser();
7782 bool IsLoPart = true;
7783 if (User->getOpcode() == ISD::SRL &&
7784 User->getOperand(1).getOpcode() == ISD::Constant &&
7785 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
7786 User = *User->user_begin();
7787 IsLoPart = false;
7788 }
7789 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)
7790 return false;
7791
7792 if (IsLoPart) {
7793 if (LoPart)
7794 return false;
7795 LoPart = User;
7796 } else {
7797 if (HiPart)
7798 return false;
7799 HiPart = User;
7800 }
7801 }
7802 return true;
7803}
7804
7805static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7806 SDNode *&HiPart) {
7807 LoPart = HiPart = nullptr;
7808
7809 // Scan through all users.
7810 for (SDUse &Use : LD->uses()) {
7811 // Skip the uses of the chain.
7812 if (Use.getResNo() != 0)
7813 continue;
7814
7815 // Verify every user is an EXTRACT_SUBREG of the low or high half.
7816 SDNode *User = Use.getUser();
7817 if (!User->hasOneUse() || !User->isMachineOpcode() ||
7818 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
7819 return false;
7820
7821 switch (User->getConstantOperandVal(1)) {
7822 case SystemZ::subreg_l64:
7823 if (LoPart)
7824 return false;
7825 LoPart = User;
7826 break;
7827 case SystemZ::subreg_h64:
7828 if (HiPart)
7829 return false;
7830 HiPart = User;
7831 break;
7832 default:
7833 return false;
7834 }
7835 }
7836 return true;
7837}
7838
7839SDValue SystemZTargetLowering::combineLOAD(
7840 SDNode *N, DAGCombinerInfo &DCI) const {
7841 SelectionDAG &DAG = DCI.DAG;
7842 EVT LdVT = N->getValueType(0);
7843 if (auto *LN = dyn_cast<LoadSDNode>(N)) {
7844 if (LN->getAddressSpace() == SYSTEMZAS::PTR32) {
7845 MVT PtrVT = getPointerTy(DAG.getDataLayout());
7846 MVT LoadNodeVT = LN->getBasePtr().getSimpleValueType();
7847 if (PtrVT != LoadNodeVT) {
7848 SDLoc DL(LN);
7849 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(
7850 DL, PtrVT, LN->getBasePtr(), SYSTEMZAS::PTR32, 0);
7851 return DAG.getExtLoad(LN->getExtensionType(), DL, LN->getValueType(0),
7852 LN->getChain(), AddrSpaceCast, LN->getMemoryVT(),
7853 LN->getMemOperand());
7854 }
7855 }
7856 }
7857 SDLoc DL(N);
7858
7859 // Replace a 128-bit load that is used solely to move its value into GPRs
7860 // by separate loads of both halves.
7861 LoadSDNode *LD = cast<LoadSDNode>(N);
7862 if (LD->isSimple() && ISD::isNormalLoad(LD)) {
7863 SDNode *LoPart, *HiPart;
7864 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||
7865 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {
7866 // Rewrite each extraction as an independent load.
7867 SmallVector<SDValue, 2> ArgChains;
7868 if (HiPart) {
7869 SDValue EltLoad = DAG.getLoad(
7870 HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(),
7871 LD->getPointerInfo(), LD->getBaseAlign(),
7872 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7873
7874 DCI.CombineTo(HiPart, EltLoad, true);
7875 ArgChains.push_back(EltLoad.getValue(1));
7876 }
7877 if (LoPart) {
7878 SDValue EltLoad = DAG.getLoad(
7879 LoPart->getValueType(0), DL, LD->getChain(),
7880 DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)),
7881 LD->getPointerInfo().getWithOffset(8), LD->getBaseAlign(),
7882 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7883
7884 DCI.CombineTo(LoPart, EltLoad, true);
7885 ArgChains.push_back(EltLoad.getValue(1));
7886 }
7887
7888 // Collect all chains via TokenFactor.
7889 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains);
7890 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
7891 DCI.AddToWorklist(Chain.getNode());
7892 return SDValue(N, 0);
7893 }
7894 }
7895
7896 if (LdVT.isVector() || LdVT.isInteger())
7897 return SDValue();
7898 // Transform a scalar load that is REPLICATEd as well as having other
7899 // use(s) to the form where the other use(s) use the first element of the
7900 // REPLICATE instead of the load. Otherwise instruction selection will not
7901 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
7902 // point loads.
7903
7904 SDValue Replicate;
7905 SmallVector<SDNode*, 8> OtherUses;
7906 for (SDUse &Use : N->uses()) {
7907 if (Use.getUser()->getOpcode() == SystemZISD::REPLICATE) {
7908 if (Replicate)
7909 return SDValue(); // Should never happen
7910 Replicate = SDValue(Use.getUser(), 0);
7911 } else if (Use.getResNo() == 0)
7912 OtherUses.push_back(Use.getUser());
7913 }
7914 if (!Replicate || OtherUses.empty())
7915 return SDValue();
7916
7917 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
7918 Replicate, DAG.getConstant(0, DL, MVT::i32));
7919 // Update uses of the loaded Value while preserving old chains.
7920 for (SDNode *U : OtherUses) {
7922 for (SDValue Op : U->ops())
7923 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
7924 DAG.UpdateNodeOperands(U, Ops);
7925 }
7926 return SDValue(N, 0);
7927}
7928
7929bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
7930 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
7931 return true;
7932 if (Subtarget.hasVectorEnhancements2())
7933 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
7934 return true;
7935 return false;
7936}
7937
7939 if (!VT.isVector() || !VT.isSimple() ||
7940 VT.getSizeInBits() != 128 ||
7941 VT.getScalarSizeInBits() % 8 != 0)
7942 return false;
7943
7944 unsigned NumElts = VT.getVectorNumElements();
7945 for (unsigned i = 0; i < NumElts; ++i) {
7946 if (M[i] < 0) continue; // ignore UNDEF indices
7947 if ((unsigned) M[i] != NumElts - 1 - i)
7948 return false;
7949 }
7950
7951 return true;
7952}
7953
7954static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
7955 for (auto *U : StoredVal->users()) {
7956 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
7957 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
7958 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
7959 continue;
7960 } else if (isa<BuildVectorSDNode>(U)) {
7961 SDValue BuildVector = SDValue(U, 0);
7962 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
7963 isOnlyUsedByStores(BuildVector, DAG))
7964 continue;
7965 }
7966 return false;
7967 }
7968 return true;
7969}
7970
7971static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
7972 SDValue &HiPart) {
7973 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
7974 return false;
7975
7976 SDValue Op0 = Val.getOperand(0);
7977 SDValue Op1 = Val.getOperand(1);
7978
7979 if (Op0.getOpcode() == ISD::SHL)
7980 std::swap(Op0, Op1);
7981 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
7982 Op1.getOperand(1).getOpcode() != ISD::Constant ||
7983 Op1.getConstantOperandVal(1) != 64)
7984 return false;
7985 Op1 = Op1.getOperand(0);
7986
7987 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
7988 Op0.getOperand(0).getValueType() != MVT::i64)
7989 return false;
7990 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
7991 Op1.getOperand(0).getValueType() != MVT::i64)
7992 return false;
7993
7994 LoPart = Op0.getOperand(0);
7995 HiPart = Op1.getOperand(0);
7996 return true;
7997}
7998
7999static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
8000 SDValue &HiPart) {
8001 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
8002 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
8003 return false;
8004
8005 if (Val->getNumOperands() != 5 ||
8006 Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
8007 Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||
8008 Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)
8009 return false;
8010
8011 LoPart = Val->getOperand(1);
8012 HiPart = Val->getOperand(3);
8013 return true;
8014}
8015
8016SDValue SystemZTargetLowering::combineSTORE(
8017 SDNode *N, DAGCombinerInfo &DCI) const {
8018 SelectionDAG &DAG = DCI.DAG;
8019 auto *SN = cast<StoreSDNode>(N);
8020 auto &Op1 = N->getOperand(1);
8021 EVT MemVT = SN->getMemoryVT();
8022
8023 if (SN->getAddressSpace() == SYSTEMZAS::PTR32) {
8024 MVT PtrVT = getPointerTy(DAG.getDataLayout());
8025 MVT StoreNodeVT = SN->getBasePtr().getSimpleValueType();
8026 if (PtrVT != StoreNodeVT) {
8027 SDLoc DL(SN);
8028 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(DL, PtrVT, SN->getBasePtr(),
8029 SYSTEMZAS::PTR32, 0);
8030 return DAG.getStore(SN->getChain(), DL, SN->getValue(), AddrSpaceCast,
8031 SN->getPointerInfo(), SN->getBaseAlign(),
8032 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8033 }
8034 }
8035
8036 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
8037 // for the extraction to be done on a vMiN value, so that we can use VSTE.
8038 // If X has wider elements then convert it to:
8039 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
8040 if (MemVT.isInteger() && SN->isTruncatingStore()) {
8041 if (SDValue Value =
8042 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
8043 DCI.AddToWorklist(Value.getNode());
8044
8045 // Rewrite the store with the new form of stored value.
8046 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
8047 SN->getBasePtr(), SN->getMemoryVT(),
8048 SN->getMemOperand());
8049 }
8050 }
8051 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
8052 if (!SN->isTruncatingStore() &&
8053 Op1.getOpcode() == ISD::BSWAP &&
8054 Op1.getNode()->hasOneUse() &&
8055 canLoadStoreByteSwapped(Op1.getValueType())) {
8056
8057 SDValue BSwapOp = Op1.getOperand(0);
8058
8059 if (BSwapOp.getValueType() == MVT::i16)
8060 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
8061
8062 SDValue Ops[] = {
8063 N->getOperand(0), BSwapOp, N->getOperand(2)
8064 };
8065
8066 return
8067 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
8068 Ops, MemVT, SN->getMemOperand());
8069 }
8070 // Combine STORE (element-swap) into VSTER
8071 if (!SN->isTruncatingStore() &&
8072 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
8073 Op1.getNode()->hasOneUse() &&
8074 Subtarget.hasVectorEnhancements2()) {
8075 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
8076 ArrayRef<int> ShuffleMask = SVN->getMask();
8077 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
8078 SDValue Ops[] = {
8079 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
8080 };
8081
8082 return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N),
8083 DAG.getVTList(MVT::Other),
8084 Ops, MemVT, SN->getMemOperand());
8085 }
8086 }
8087
8088 // Combine STORE (READCYCLECOUNTER) into STCKF.
8089 if (!SN->isTruncatingStore() &&
8091 Op1.hasOneUse() &&
8092 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
8093 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
8094 return DAG.getMemIntrinsicNode(SystemZISD::STCKF, SDLoc(N),
8095 DAG.getVTList(MVT::Other),
8096 Ops, MemVT, SN->getMemOperand());
8097 }
8098
8099 // Transform a store of a 128-bit value moved from parts into two stores.
8100 if (SN->isSimple() && ISD::isNormalStore(SN)) {
8101 SDValue LoPart, HiPart;
8102 if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||
8103 (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {
8104 SDLoc DL(SN);
8105 SDValue Chain0 = DAG.getStore(
8106 SN->getChain(), DL, HiPart, SN->getBasePtr(), SN->getPointerInfo(),
8107 SN->getBaseAlign(), SN->getMemOperand()->getFlags(), SN->getAAInfo());
8108 SDValue Chain1 = DAG.getStore(
8109 SN->getChain(), DL, LoPart,
8110 DAG.getObjectPtrOffset(DL, SN->getBasePtr(), TypeSize::getFixed(8)),
8111 SN->getPointerInfo().getWithOffset(8), SN->getBaseAlign(),
8112 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8113
8114 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
8115 }
8116 }
8117
8118 // Replicate a reg or immediate with VREP instead of scalar multiply or
8119 // immediate load. It seems best to do this during the first DAGCombine as
8120 // it is straight-forward to handle the zero-extend node in the initial
8121 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
8122 // extracting an i16 element from a v16i8 vector).
8123 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
8124 isOnlyUsedByStores(Op1, DAG)) {
8125 SDValue Word = SDValue();
8126 EVT WordVT;
8127
8128 // Find a replicated immediate and return it if found in Word and its
8129 // type in WordVT.
8130 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
8131 // Some constants are better handled with a scalar store.
8132 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
8133 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
8134 return;
8135
8136 APInt Val = C->getAPIntValue();
8137 // Truncate Val in case of a truncating store.
8138 if (!llvm::isUIntN(TotBytes * 8, Val.getZExtValue())) {
8139 assert(SN->isTruncatingStore() &&
8140 "Non-truncating store and immediate value does not fit?");
8141 Val = Val.trunc(TotBytes * 8);
8142 }
8143
8144 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, Val.getZExtValue()));
8145 if (VCI.isVectorConstantLegal(Subtarget) &&
8146 VCI.Opcode == SystemZISD::REPLICATE) {
8147 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
8148 WordVT = VCI.VecVT.getScalarType();
8149 }
8150 };
8151
8152 // Find a replicated register and return it if found in Word and its type
8153 // in WordVT.
8154 auto FindReplicatedReg = [&](SDValue MulOp) {
8155 EVT MulVT = MulOp.getValueType();
8156 if (MulOp->getOpcode() == ISD::MUL &&
8157 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
8158 // Find a zero extended value and its type.
8159 SDValue LHS = MulOp->getOperand(0);
8160 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
8161 WordVT = LHS->getOperand(0).getValueType();
8162 else if (LHS->getOpcode() == ISD::AssertZext)
8163 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
8164 else
8165 return;
8166 // Find a replicating constant, e.g. 0x00010001.
8167 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
8168 SystemZVectorConstantInfo VCI(
8169 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
8170 if (VCI.isVectorConstantLegal(Subtarget) &&
8171 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
8172 WordVT == VCI.VecVT.getScalarType())
8173 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
8174 }
8175 }
8176 };
8177
8178 if (isa<BuildVectorSDNode>(Op1) &&
8179 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
8180 SDValue SplatVal = Op1->getOperand(0);
8181 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
8182 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
8183 else
8184 FindReplicatedReg(SplatVal);
8185 } else {
8186 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
8187 FindReplicatedImm(C, MemVT.getStoreSize());
8188 else
8189 FindReplicatedReg(Op1);
8190 }
8191
8192 if (Word != SDValue()) {
8193 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
8194 "Bad type handling");
8195 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
8196 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
8197 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
8198 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
8199 SN->getBasePtr(), SN->getMemOperand());
8200 }
8201 }
8202
8203 return SDValue();
8204}
8205
8206SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
8207 SDNode *N, DAGCombinerInfo &DCI) const {
8208 SelectionDAG &DAG = DCI.DAG;
8209 // Combine element-swap (LOAD) into VLER
8210 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8211 N->getOperand(0).hasOneUse() &&
8212 Subtarget.hasVectorEnhancements2()) {
8213 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
8214 ArrayRef<int> ShuffleMask = SVN->getMask();
8215 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
8216 SDValue Load = N->getOperand(0);
8217 LoadSDNode *LD = cast<LoadSDNode>(Load);
8218
8219 // Create the element-swapping load.
8220 SDValue Ops[] = {
8221 LD->getChain(), // Chain
8222 LD->getBasePtr() // Ptr
8223 };
8224 SDValue ESLoad =
8225 DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N),
8226 DAG.getVTList(LD->getValueType(0), MVT::Other),
8227 Ops, LD->getMemoryVT(), LD->getMemOperand());
8228
8229 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
8230 // by the load dead.
8231 DCI.CombineTo(N, ESLoad);
8232
8233 // Next, combine the load away, we give it a bogus result value but a real
8234 // chain result. The result value is dead because the shuffle is dead.
8235 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
8236
8237 // Return N so it doesn't get rechecked!
8238 return SDValue(N, 0);
8239 }
8240 }
8241
8242 return SDValue();
8243}
8244
8245SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
8246 SDNode *N, DAGCombinerInfo &DCI) const {
8247 SelectionDAG &DAG = DCI.DAG;
8248
8249 if (!Subtarget.hasVector())
8250 return SDValue();
8251
8252 // Look through bitcasts that retain the number of vector elements.
8253 SDValue Op = N->getOperand(0);
8254 if (Op.getOpcode() == ISD::BITCAST &&
8255 Op.getValueType().isVector() &&
8256 Op.getOperand(0).getValueType().isVector() &&
8257 Op.getValueType().getVectorNumElements() ==
8258 Op.getOperand(0).getValueType().getVectorNumElements())
8259 Op = Op.getOperand(0);
8260
8261 // Pull BSWAP out of a vector extraction.
8262 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
8263 EVT VecVT = Op.getValueType();
8264 EVT EltVT = VecVT.getVectorElementType();
8265 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
8266 Op.getOperand(0), N->getOperand(1));
8267 DCI.AddToWorklist(Op.getNode());
8268 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
8269 if (EltVT != N->getValueType(0)) {
8270 DCI.AddToWorklist(Op.getNode());
8271 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
8272 }
8273 return Op;
8274 }
8275
8276 // Try to simplify a vector extraction.
8277 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
8278 SDValue Op0 = N->getOperand(0);
8279 EVT VecVT = Op0.getValueType();
8280 if (canTreatAsByteVector(VecVT))
8281 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
8282 IndexN->getZExtValue(), DCI, false);
8283 }
8284 return SDValue();
8285}
8286
8287SDValue SystemZTargetLowering::combineJOIN_DWORDS(
8288 SDNode *N, DAGCombinerInfo &DCI) const {
8289 SelectionDAG &DAG = DCI.DAG;
8290 // (join_dwords X, X) == (replicate X)
8291 if (N->getOperand(0) == N->getOperand(1))
8292 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
8293 N->getOperand(0));
8294 return SDValue();
8295}
8296
8298 SDValue Chain1 = N1->getOperand(0);
8299 SDValue Chain2 = N2->getOperand(0);
8300
8301 // Trivial case: both nodes take the same chain.
8302 if (Chain1 == Chain2)
8303 return Chain1;
8304
8305 // FIXME - we could handle more complex cases via TokenFactor,
8306 // assuming we can verify that this would not create a cycle.
8307 return SDValue();
8308}
8309
8310SDValue SystemZTargetLowering::combineFP_ROUND(
8311 SDNode *N, DAGCombinerInfo &DCI) const {
8312
8313 if (!Subtarget.hasVector())
8314 return SDValue();
8315
8316 // (fpround (extract_vector_elt X 0))
8317 // (fpround (extract_vector_elt X 1)) ->
8318 // (extract_vector_elt (VROUND X) 0)
8319 // (extract_vector_elt (VROUND X) 2)
8320 //
8321 // This is a special case since the target doesn't really support v2f32s.
8322 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8323 SelectionDAG &DAG = DCI.DAG;
8324 SDValue Op0 = N->getOperand(OpNo);
8325 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
8327 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
8328 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8329 Op0.getConstantOperandVal(1) == 0) {
8330 SDValue Vec = Op0.getOperand(0);
8331 for (auto *U : Vec->users()) {
8332 if (U != Op0.getNode() && U->hasOneUse() &&
8333 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8334 U->getOperand(0) == Vec &&
8335 U->getOperand(1).getOpcode() == ISD::Constant &&
8336 U->getConstantOperandVal(1) == 1) {
8337 SDValue OtherRound = SDValue(*U->user_begin(), 0);
8338 if (OtherRound.getOpcode() == N->getOpcode() &&
8339 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
8340 OtherRound.getValueType() == MVT::f32) {
8341 SDValue VRound, Chain;
8342 if (N->isStrictFPOpcode()) {
8343 Chain = MergeInputChains(N, OtherRound.getNode());
8344 if (!Chain)
8345 continue;
8346 VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N),
8347 {MVT::v4f32, MVT::Other}, {Chain, Vec});
8348 Chain = VRound.getValue(1);
8349 } else
8350 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
8351 MVT::v4f32, Vec);
8352 DCI.AddToWorklist(VRound.getNode());
8353 SDValue Extract1 =
8354 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
8355 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
8356 DCI.AddToWorklist(Extract1.getNode());
8357 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
8358 if (Chain)
8359 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
8360 SDValue Extract0 =
8361 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
8362 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8363 if (Chain)
8364 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8365 N->getVTList(), Extract0, Chain);
8366 return Extract0;
8367 }
8368 }
8369 }
8370 }
8371 return SDValue();
8372}
8373
8374SDValue SystemZTargetLowering::combineFP_EXTEND(
8375 SDNode *N, DAGCombinerInfo &DCI) const {
8376
8377 if (!Subtarget.hasVector())
8378 return SDValue();
8379
8380 // (fpextend (extract_vector_elt X 0))
8381 // (fpextend (extract_vector_elt X 2)) ->
8382 // (extract_vector_elt (VEXTEND X) 0)
8383 // (extract_vector_elt (VEXTEND X) 1)
8384 //
8385 // This is a special case since the target doesn't really support v2f32s.
8386 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8387 SelectionDAG &DAG = DCI.DAG;
8388 SDValue Op0 = N->getOperand(OpNo);
8389 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
8391 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
8392 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8393 Op0.getConstantOperandVal(1) == 0) {
8394 SDValue Vec = Op0.getOperand(0);
8395 for (auto *U : Vec->users()) {
8396 if (U != Op0.getNode() && U->hasOneUse() &&
8397 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8398 U->getOperand(0) == Vec &&
8399 U->getOperand(1).getOpcode() == ISD::Constant &&
8400 U->getConstantOperandVal(1) == 2) {
8401 SDValue OtherExtend = SDValue(*U->user_begin(), 0);
8402 if (OtherExtend.getOpcode() == N->getOpcode() &&
8403 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
8404 OtherExtend.getValueType() == MVT::f64) {
8405 SDValue VExtend, Chain;
8406 if (N->isStrictFPOpcode()) {
8407 Chain = MergeInputChains(N, OtherExtend.getNode());
8408 if (!Chain)
8409 continue;
8410 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
8411 {MVT::v2f64, MVT::Other}, {Chain, Vec});
8412 Chain = VExtend.getValue(1);
8413 } else
8414 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
8415 MVT::v2f64, Vec);
8416 DCI.AddToWorklist(VExtend.getNode());
8417 SDValue Extract1 =
8418 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
8419 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
8420 DCI.AddToWorklist(Extract1.getNode());
8421 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
8422 if (Chain)
8423 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
8424 SDValue Extract0 =
8425 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
8426 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8427 if (Chain)
8428 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8429 N->getVTList(), Extract0, Chain);
8430 return Extract0;
8431 }
8432 }
8433 }
8434 }
8435 return SDValue();
8436}
8437
8438SDValue SystemZTargetLowering::combineINT_TO_FP(
8439 SDNode *N, DAGCombinerInfo &DCI) const {
8440 if (DCI.Level != BeforeLegalizeTypes)
8441 return SDValue();
8442 SelectionDAG &DAG = DCI.DAG;
8443 LLVMContext &Ctx = *DAG.getContext();
8444 unsigned Opcode = N->getOpcode();
8445 EVT OutVT = N->getValueType(0);
8446 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
8447 SDValue Op = N->getOperand(0);
8448 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
8449 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
8450
8451 // Insert an extension before type-legalization to avoid scalarization, e.g.:
8452 // v2f64 = uint_to_fp v2i16
8453 // =>
8454 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
8455 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
8456 OutScalarBits <= 64) {
8457 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
8458 EVT ExtVT = EVT::getVectorVT(
8459 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
8460 unsigned ExtOpcode =
8462 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
8463 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
8464 }
8465 return SDValue();
8466}
8467
8468SDValue SystemZTargetLowering::combineFCOPYSIGN(
8469 SDNode *N, DAGCombinerInfo &DCI) const {
8470 SelectionDAG &DAG = DCI.DAG;
8471 EVT VT = N->getValueType(0);
8472 SDValue ValOp = N->getOperand(0);
8473 SDValue SignOp = N->getOperand(1);
8474
8475 // Remove the rounding which is not needed.
8476 if (SignOp.getOpcode() == ISD::FP_ROUND) {
8477 SDValue WideOp = SignOp.getOperand(0);
8478 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, ValOp, WideOp);
8479 }
8480
8481 return SDValue();
8482}
8483
8484SDValue SystemZTargetLowering::combineBSWAP(
8485 SDNode *N, DAGCombinerInfo &DCI) const {
8486 SelectionDAG &DAG = DCI.DAG;
8487 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
8488 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8489 N->getOperand(0).hasOneUse() &&
8490 canLoadStoreByteSwapped(N->getValueType(0))) {
8491 SDValue Load = N->getOperand(0);
8492 LoadSDNode *LD = cast<LoadSDNode>(Load);
8493
8494 // Create the byte-swapping load.
8495 SDValue Ops[] = {
8496 LD->getChain(), // Chain
8497 LD->getBasePtr() // Ptr
8498 };
8499 EVT LoadVT = N->getValueType(0);
8500 if (LoadVT == MVT::i16)
8501 LoadVT = MVT::i32;
8502 SDValue BSLoad =
8503 DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
8504 DAG.getVTList(LoadVT, MVT::Other),
8505 Ops, LD->getMemoryVT(), LD->getMemOperand());
8506
8507 // If this is an i16 load, insert the truncate.
8508 SDValue ResVal = BSLoad;
8509 if (N->getValueType(0) == MVT::i16)
8510 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
8511
8512 // First, combine the bswap away. This makes the value produced by the
8513 // load dead.
8514 DCI.CombineTo(N, ResVal);
8515
8516 // Next, combine the load away, we give it a bogus result value but a real
8517 // chain result. The result value is dead because the bswap is dead.
8518 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
8519
8520 // Return N so it doesn't get rechecked!
8521 return SDValue(N, 0);
8522 }
8523
8524 // Look through bitcasts that retain the number of vector elements.
8525 SDValue Op = N->getOperand(0);
8526 if (Op.getOpcode() == ISD::BITCAST &&
8527 Op.getValueType().isVector() &&
8528 Op.getOperand(0).getValueType().isVector() &&
8529 Op.getValueType().getVectorNumElements() ==
8530 Op.getOperand(0).getValueType().getVectorNumElements())
8531 Op = Op.getOperand(0);
8532
8533 // Push BSWAP into a vector insertion if at least one side then simplifies.
8534 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
8535 SDValue Vec = Op.getOperand(0);
8536 SDValue Elt = Op.getOperand(1);
8537 SDValue Idx = Op.getOperand(2);
8538
8540 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
8542 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
8543 (canLoadStoreByteSwapped(N->getValueType(0)) &&
8544 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
8545 EVT VecVT = N->getValueType(0);
8546 EVT EltVT = N->getValueType(0).getVectorElementType();
8547 if (VecVT != Vec.getValueType()) {
8548 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
8549 DCI.AddToWorklist(Vec.getNode());
8550 }
8551 if (EltVT != Elt.getValueType()) {
8552 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
8553 DCI.AddToWorklist(Elt.getNode());
8554 }
8555 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
8556 DCI.AddToWorklist(Vec.getNode());
8557 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
8558 DCI.AddToWorklist(Elt.getNode());
8559 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
8560 Vec, Elt, Idx);
8561 }
8562 }
8563
8564 // Push BSWAP into a vector shuffle if at least one side then simplifies.
8565 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
8566 if (SV && Op.hasOneUse()) {
8567 SDValue Op0 = Op.getOperand(0);
8568 SDValue Op1 = Op.getOperand(1);
8569
8571 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
8573 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
8574 EVT VecVT = N->getValueType(0);
8575 if (VecVT != Op0.getValueType()) {
8576 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
8577 DCI.AddToWorklist(Op0.getNode());
8578 }
8579 if (VecVT != Op1.getValueType()) {
8580 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
8581 DCI.AddToWorklist(Op1.getNode());
8582 }
8583 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
8584 DCI.AddToWorklist(Op0.getNode());
8585 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
8586 DCI.AddToWorklist(Op1.getNode());
8587 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
8588 }
8589 }
8590
8591 return SDValue();
8592}
8593
8594SDValue SystemZTargetLowering::combineSETCC(
8595 SDNode *N, DAGCombinerInfo &DCI) const {
8596 SelectionDAG &DAG = DCI.DAG;
8597 const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
8598 const SDValue LHS = N->getOperand(0);
8599 const SDValue RHS = N->getOperand(1);
8600 bool CmpNull = isNullConstant(RHS);
8601 bool CmpAllOnes = isAllOnesConstant(RHS);
8602 EVT VT = N->getValueType(0);
8603 SDLoc DL(N);
8604
8605 // Match icmp_eq/ne(bitcast(icmp(X,Y)),0/-1) reduction patterns, and
8606 // change the outer compare to a i128 compare. This will normally
8607 // allow the reduction to be recognized in adjustICmp128, and even if
8608 // not, the i128 compare will still generate better code.
8609 if ((CC == ISD::SETNE || CC == ISD::SETEQ) && (CmpNull || CmpAllOnes)) {
8611 if (Src.getOpcode() == ISD::SETCC &&
8612 Src.getValueType().isFixedLengthVector() &&
8613 Src.getValueType().getScalarType() == MVT::i1) {
8614 EVT CmpVT = Src.getOperand(0).getValueType();
8615 if (CmpVT.getSizeInBits() == 128) {
8616 EVT IntVT = CmpVT.changeVectorElementTypeToInteger();
8617 SDValue LHS =
8618 DAG.getBitcast(MVT::i128, DAG.getSExtOrTrunc(Src, DL, IntVT));
8619 SDValue RHS = CmpNull ? DAG.getConstant(0, DL, MVT::i128)
8620 : DAG.getAllOnesConstant(DL, MVT::i128);
8621 return DAG.getNode(ISD::SETCC, DL, VT, LHS, RHS, N->getOperand(2),
8622 N->getFlags());
8623 }
8624 }
8625 }
8626
8627 return SDValue();
8628}
8629
8630static std::pair<SDValue, int> findCCUse(const SDValue &Val) {
8631 switch (Val.getOpcode()) {
8632 default:
8633 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8634 case SystemZISD::IPM:
8635 if (Val.getOperand(0).getOpcode() == SystemZISD::CLC ||
8636 Val.getOperand(0).getOpcode() == SystemZISD::STRCMP)
8637 return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ICMP);
8638 return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ANY);
8639 case SystemZISD::SELECT_CCMASK: {
8640 SDValue Op4CCReg = Val.getOperand(4);
8641 if (Op4CCReg.getOpcode() == SystemZISD::ICMP ||
8642 Op4CCReg.getOpcode() == SystemZISD::TM) {
8643 auto [OpCC, OpCCValid] = findCCUse(Op4CCReg.getOperand(0));
8644 if (OpCC != SDValue())
8645 return std::make_pair(OpCC, OpCCValid);
8646 }
8647 auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
8648 if (!CCValid)
8649 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8650 int CCValidVal = CCValid->getZExtValue();
8651 return std::make_pair(Op4CCReg, CCValidVal);
8652 }
8653 case ISD::ADD:
8654 case ISD::AND:
8655 case ISD::OR:
8656 case ISD::XOR:
8657 case ISD::SHL:
8658 case ISD::SRA:
8659 case ISD::SRL:
8660 auto [Op0CC, Op0CCValid] = findCCUse(Val.getOperand(0));
8661 if (Op0CC != SDValue())
8662 return std::make_pair(Op0CC, Op0CCValid);
8663 return findCCUse(Val.getOperand(1));
8664 }
8665}
8666
8667static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8668 SelectionDAG &DAG);
8669
8671 SelectionDAG &DAG) {
8672 SDLoc DL(Val);
8673 auto Opcode = Val.getOpcode();
8674 switch (Opcode) {
8675 default:
8676 return {};
8677 case ISD::Constant:
8678 return {Val, Val, Val, Val};
8679 case SystemZISD::IPM: {
8680 SDValue IPMOp0 = Val.getOperand(0);
8681 if (IPMOp0 != CC)
8682 return {};
8683 SmallVector<SDValue, 4> ShiftedCCVals;
8684 for (auto CC : {0, 1, 2, 3})
8685 ShiftedCCVals.emplace_back(
8686 DAG.getConstant((CC << SystemZ::IPM_CC), DL, MVT::i32));
8687 return ShiftedCCVals;
8688 }
8689 case SystemZISD::SELECT_CCMASK: {
8690 SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
8691 auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
8692 auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
8693 if (!CCValid || !CCMask)
8694 return {};
8695
8696 int CCValidVal = CCValid->getZExtValue();
8697 int CCMaskVal = CCMask->getZExtValue();
8698 // Pruning search tree early - Moving CC test and combineCCMask ahead of
8699 // recursive call to simplifyAssumingCCVal.
8700 SDValue Op4CCReg = Val.getOperand(4);
8701 if (Op4CCReg != CC)
8702 combineCCMask(Op4CCReg, CCValidVal, CCMaskVal, DAG);
8703 if (Op4CCReg != CC)
8704 return {};
8705 const auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CC, DAG);
8706 const auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CC, DAG);
8707 if (TrueSDVals.empty() || FalseSDVals.empty())
8708 return {};
8709 SmallVector<SDValue, 4> MergedSDVals;
8710 for (auto &CCVal : {0, 1, 2, 3})
8711 MergedSDVals.emplace_back(((CCMaskVal & (1 << (3 - CCVal))) != 0)
8712 ? TrueSDVals[CCVal]
8713 : FalseSDVals[CCVal]);
8714 return MergedSDVals;
8715 }
8716 case ISD::ADD:
8717 case ISD::AND:
8718 case ISD::OR:
8719 case ISD::XOR:
8720 case ISD::SRA:
8721 // Avoid introducing CC spills (because ADD/AND/OR/XOR/SRA
8722 // would clobber CC).
8723 if (!Val.hasOneUse())
8724 return {};
8725 [[fallthrough]];
8726 case ISD::SHL:
8727 case ISD::SRL:
8728 SDValue Op0 = Val.getOperand(0), Op1 = Val.getOperand(1);
8729 const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, CC, DAG);
8730 const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, CC, DAG);
8731 if (Op0SDVals.empty() || Op1SDVals.empty())
8732 return {};
8733 SmallVector<SDValue, 4> BinaryOpSDVals;
8734 for (auto CCVal : {0, 1, 2, 3})
8735 BinaryOpSDVals.emplace_back(DAG.getNode(
8736 Opcode, DL, Val.getValueType(), Op0SDVals[CCVal], Op1SDVals[CCVal]));
8737 return BinaryOpSDVals;
8738 }
8739}
8740
8741static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8742 SelectionDAG &DAG) {
8743 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
8744 // set by the CCReg instruction using the CCValid / CCMask masks,
8745 // If the CCReg instruction is itself a ICMP / TM testing the condition
8746 // code set by some other instruction, see whether we can directly
8747 // use that condition code.
8748 auto *CCNode = CCReg.getNode();
8749 if (!CCNode)
8750 return false;
8751
8752 if (CCNode->getOpcode() == SystemZISD::TM) {
8753 if (CCValid != SystemZ::CCMASK_TM)
8754 return false;
8755 auto emulateTMCCMask = [](const SDValue &Op0Val, const SDValue &Op1Val) {
8756 auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
8757 auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
8758 if (!Op0Node || !Op1Node)
8759 return -1;
8760 auto Op0APVal = Op0Node->getAPIntValue();
8761 auto Op1APVal = Op1Node->getAPIntValue();
8762 auto Result = Op0APVal & Op1APVal;
8763 bool AllOnes = Result == Op1APVal;
8764 bool AllZeros = Result == 0;
8765 bool IsLeftMostBitSet = Result[Op1APVal.getActiveBits()] != 0;
8766 return AllZeros ? 0 : AllOnes ? 3 : IsLeftMostBitSet ? 2 : 1;
8767 };
8768 SDValue Op0 = CCNode->getOperand(0);
8769 SDValue Op1 = CCNode->getOperand(1);
8770 auto [Op0CC, Op0CCValid] = findCCUse(Op0);
8771 if (Op0CC == SDValue())
8772 return false;
8773 const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, Op0CC, DAG);
8774 const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, Op0CC, DAG);
8775 if (Op0SDVals.empty() || Op1SDVals.empty())
8776 return false;
8777 int NewCCMask = 0;
8778 for (auto CC : {0, 1, 2, 3}) {
8779 auto CCVal = emulateTMCCMask(Op0SDVals[CC], Op1SDVals[CC]);
8780 if (CCVal < 0)
8781 return false;
8782 NewCCMask <<= 1;
8783 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8784 }
8785 NewCCMask &= Op0CCValid;
8786 CCReg = Op0CC;
8787 CCMask = NewCCMask;
8788 CCValid = Op0CCValid;
8789 return true;
8790 }
8791 if (CCNode->getOpcode() != SystemZISD::ICMP ||
8792 CCValid != SystemZ::CCMASK_ICMP)
8793 return false;
8794
8795 SDValue CmpOp0 = CCNode->getOperand(0);
8796 SDValue CmpOp1 = CCNode->getOperand(1);
8797 SDValue CmpOp2 = CCNode->getOperand(2);
8798 auto [Op0CC, Op0CCValid] = findCCUse(CmpOp0);
8799 if (Op0CC != SDValue()) {
8800 const auto &&Op0SDVals = simplifyAssumingCCVal(CmpOp0, Op0CC, DAG);
8801 const auto &&Op1SDVals = simplifyAssumingCCVal(CmpOp1, Op0CC, DAG);
8802 if (Op0SDVals.empty() || Op1SDVals.empty())
8803 return false;
8804
8805 auto *CmpType = dyn_cast<ConstantSDNode>(CmpOp2);
8806 auto CmpTypeVal = CmpType->getZExtValue();
8807 const auto compareCCSigned = [&CmpTypeVal](const SDValue &Op0Val,
8808 const SDValue &Op1Val) {
8809 auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
8810 auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
8811 if (!Op0Node || !Op1Node)
8812 return -1;
8813 auto Op0APVal = Op0Node->getAPIntValue();
8814 auto Op1APVal = Op1Node->getAPIntValue();
8815 if (CmpTypeVal == SystemZICMP::SignedOnly)
8816 return Op0APVal == Op1APVal ? 0 : Op0APVal.slt(Op1APVal) ? 1 : 2;
8817 return Op0APVal == Op1APVal ? 0 : Op0APVal.ult(Op1APVal) ? 1 : 2;
8818 };
8819 int NewCCMask = 0;
8820 for (auto CC : {0, 1, 2, 3}) {
8821 auto CCVal = compareCCSigned(Op0SDVals[CC], Op1SDVals[CC]);
8822 if (CCVal < 0)
8823 return false;
8824 NewCCMask <<= 1;
8825 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8826 }
8827 NewCCMask &= Op0CCValid;
8828 CCMask = NewCCMask;
8829 CCReg = Op0CC;
8830 CCValid = Op0CCValid;
8831 return true;
8832 }
8833
8834 return false;
8835}
8836
8837// Merging versus split in multiple branches cost.
8840 const Value *Lhs,
8841 const Value *Rhs) const {
8842 const auto isFlagOutOpCC = [](const Value *V) {
8843 using namespace llvm::PatternMatch;
8844 const Value *RHSVal;
8845 const APInt *RHSC;
8846 if (const auto *I = dyn_cast<Instruction>(V)) {
8847 // PatternMatch.h provides concise tree-based pattern match of llvm IR.
8848 if (match(I->getOperand(0), m_And(m_Value(RHSVal), m_APInt(RHSC))) ||
8849 match(I, m_Cmp(m_Value(RHSVal), m_APInt(RHSC)))) {
8850 if (const auto *CB = dyn_cast<CallBase>(RHSVal)) {
8851 if (CB->isInlineAsm()) {
8852 const InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
8853 return IA && IA->getConstraintString().contains("{@cc}");
8854 }
8855 }
8856 }
8857 }
8858 return false;
8859 };
8860 // Pattern (ICmp %asm) or (ICmp (And %asm)).
8861 // Cost of longest dependency chain (ICmp, And) is 2. CostThreshold or
8862 // BaseCost can be set >=2. If cost of instruction <= CostThreshold
8863 // conditionals will be merged or else conditionals will be split.
8864 if (isFlagOutOpCC(Lhs) && isFlagOutOpCC(Rhs))
8865 return {3, 0, -1};
8866 // Default.
8867 return {-1, -1, -1};
8868}
8869
8870SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode *N,
8871 DAGCombinerInfo &DCI) const {
8872 SelectionDAG &DAG = DCI.DAG;
8873
8874 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
8875 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
8876 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
8877 if (!CCValid || !CCMask)
8878 return SDValue();
8879
8880 int CCValidVal = CCValid->getZExtValue();
8881 int CCMaskVal = CCMask->getZExtValue();
8882 SDValue Chain = N->getOperand(0);
8883 SDValue CCReg = N->getOperand(4);
8884 // If combineCMask was able to merge or simplify ccvalid or ccmask, re-emit
8885 // the modified BR_CCMASK with the new values.
8886 // In order to avoid conditional branches with full or empty cc masks, do not
8887 // do this if ccmask is 0 or equal to ccvalid.
8888 if (combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG) && CCMaskVal != 0 &&
8889 CCMaskVal != CCValidVal)
8890 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
8891 Chain,
8892 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
8893 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
8894 N->getOperand(3), CCReg);
8895 return SDValue();
8896}
8897
8898SDValue SystemZTargetLowering::combineSELECT_CCMASK(
8899 SDNode *N, DAGCombinerInfo &DCI) const {
8900 SelectionDAG &DAG = DCI.DAG;
8901
8902 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
8903 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
8904 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
8905 if (!CCValid || !CCMask)
8906 return SDValue();
8907
8908 int CCValidVal = CCValid->getZExtValue();
8909 int CCMaskVal = CCMask->getZExtValue();
8910 SDValue CCReg = N->getOperand(4);
8911
8912 bool IsCombinedCCReg = combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG);
8913
8914 // Populate SDVals vector for each condition code ccval for given Val, which
8915 // can again be another nested select_ccmask with the same CC.
8916 const auto constructCCSDValsFromSELECT = [&CCReg](SDValue &Val) {
8917 if (Val.getOpcode() == SystemZISD::SELECT_CCMASK) {
8919 if (Val.getOperand(4) != CCReg)
8920 return SmallVector<SDValue, 4>{};
8921 SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
8922 auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
8923 if (!CCMask)
8924 return SmallVector<SDValue, 4>{};
8925
8926 int CCMaskVal = CCMask->getZExtValue();
8927 for (auto &CC : {0, 1, 2, 3})
8928 Res.emplace_back(((CCMaskVal & (1 << (3 - CC))) != 0) ? TrueVal
8929 : FalseVal);
8930 return Res;
8931 }
8932 return SmallVector<SDValue, 4>{Val, Val, Val, Val};
8933 };
8934 // Attempting to optimize TrueVal/FalseVal in outermost select_ccmask either
8935 // with CCReg found by combineCCMask or original CCReg.
8936 SDValue TrueVal = N->getOperand(0);
8937 SDValue FalseVal = N->getOperand(1);
8938 auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CCReg, DAG);
8939 auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CCReg, DAG);
8940 // TrueSDVals/FalseSDVals might be empty in case of non-constant
8941 // TrueVal/FalseVal for select_ccmask, which can not be optimized further.
8942 if (TrueSDVals.empty())
8943 TrueSDVals = constructCCSDValsFromSELECT(TrueVal);
8944 if (FalseSDVals.empty())
8945 FalseSDVals = constructCCSDValsFromSELECT(FalseVal);
8946 if (!TrueSDVals.empty() && !FalseSDVals.empty()) {
8947 SmallSet<SDValue, 4> MergedSDValsSet;
8948 // Ignoring CC values outside CCValiid.
8949 for (auto CC : {0, 1, 2, 3}) {
8950 if ((CCValidVal & ((1 << (3 - CC)))) != 0)
8951 MergedSDValsSet.insert(((CCMaskVal & (1 << (3 - CC))) != 0)
8952 ? TrueSDVals[CC]
8953 : FalseSDVals[CC]);
8954 }
8955 if (MergedSDValsSet.size() == 1)
8956 return *MergedSDValsSet.begin();
8957 if (MergedSDValsSet.size() == 2) {
8958 auto BeginIt = MergedSDValsSet.begin();
8959 SDValue NewTrueVal = *BeginIt, NewFalseVal = *next(BeginIt);
8960 if (NewTrueVal == FalseVal || NewFalseVal == TrueVal)
8961 std::swap(NewTrueVal, NewFalseVal);
8962 int NewCCMask = 0;
8963 for (auto CC : {0, 1, 2, 3}) {
8964 NewCCMask <<= 1;
8965 NewCCMask |= ((CCMaskVal & (1 << (3 - CC))) != 0)
8966 ? (TrueSDVals[CC] == NewTrueVal)
8967 : (FalseSDVals[CC] == NewTrueVal);
8968 }
8969 CCMaskVal = NewCCMask;
8970 CCMaskVal &= CCValidVal;
8971 TrueVal = NewTrueVal;
8972 FalseVal = NewFalseVal;
8973 IsCombinedCCReg = true;
8974 }
8975 }
8976 // If the condition is trivially false or trivially true after
8977 // combineCCMask, just collapse this SELECT_CCMASK to the indicated value
8978 // (possibly modified by constructCCSDValsFromSELECT).
8979 if (CCMaskVal == 0)
8980 return FalseVal;
8981 if (CCMaskVal == CCValidVal)
8982 return TrueVal;
8983
8984 if (IsCombinedCCReg)
8985 return DAG.getNode(
8986 SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), TrueVal,
8987 FalseVal, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
8988 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg);
8989
8990 return SDValue();
8991}
8992
8993SDValue SystemZTargetLowering::combineGET_CCMASK(
8994 SDNode *N, DAGCombinerInfo &DCI) const {
8995
8996 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
8997 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
8998 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
8999 if (!CCValid || !CCMask)
9000 return SDValue();
9001 int CCValidVal = CCValid->getZExtValue();
9002 int CCMaskVal = CCMask->getZExtValue();
9003
9004 SDValue Select = N->getOperand(0);
9005 if (Select->getOpcode() == ISD::TRUNCATE)
9006 Select = Select->getOperand(0);
9007 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
9008 return SDValue();
9009
9010 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
9011 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
9012 if (!SelectCCValid || !SelectCCMask)
9013 return SDValue();
9014 int SelectCCValidVal = SelectCCValid->getZExtValue();
9015 int SelectCCMaskVal = SelectCCMask->getZExtValue();
9016
9017 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
9018 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
9019 if (!TrueVal || !FalseVal)
9020 return SDValue();
9021 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
9022 ;
9023 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
9024 SelectCCMaskVal ^= SelectCCValidVal;
9025 else
9026 return SDValue();
9027
9028 if (SelectCCValidVal & ~CCValidVal)
9029 return SDValue();
9030 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
9031 return SDValue();
9032
9033 return Select->getOperand(4);
9034}
9035
9036SDValue SystemZTargetLowering::combineIntDIVREM(
9037 SDNode *N, DAGCombinerInfo &DCI) const {
9038 SelectionDAG &DAG = DCI.DAG;
9039 EVT VT = N->getValueType(0);
9040 // In the case where the divisor is a vector of constants a cheaper
9041 // sequence of instructions can replace the divide. BuildSDIV is called to
9042 // do this during DAG combining, but it only succeeds when it can build a
9043 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
9044 // since it is not Legal but Custom it can only happen before
9045 // legalization. Therefore we must scalarize this early before Combine
9046 // 1. For widened vectors, this is already the result of type legalization.
9047 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
9048 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
9049 return DAG.UnrollVectorOp(N);
9050 return SDValue();
9051}
9052
9053
9054// Transform a right shift of a multiply-and-add into a multiply-and-add-high.
9055// This is closely modeled after the common-code combineShiftToMULH.
9056SDValue SystemZTargetLowering::combineShiftToMulAddHigh(
9057 SDNode *N, DAGCombinerInfo &DCI) const {
9058 SelectionDAG &DAG = DCI.DAG;
9059 SDLoc DL(N);
9060
9061 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
9062 "SRL or SRA node is required here!");
9063
9064 if (!Subtarget.hasVector())
9065 return SDValue();
9066
9067 // Check the shift amount. Proceed with the transformation if the shift
9068 // amount is constant.
9069 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
9070 if (!ShiftAmtSrc)
9071 return SDValue();
9072
9073 // The operation feeding into the shift must be an add.
9074 SDValue ShiftOperand = N->getOperand(0);
9075 if (ShiftOperand.getOpcode() != ISD::ADD)
9076 return SDValue();
9077
9078 // One operand of the add must be a multiply.
9079 SDValue MulOp = ShiftOperand.getOperand(0);
9080 SDValue AddOp = ShiftOperand.getOperand(1);
9081 if (MulOp.getOpcode() != ISD::MUL) {
9082 if (AddOp.getOpcode() != ISD::MUL)
9083 return SDValue();
9084 std::swap(MulOp, AddOp);
9085 }
9086
9087 // All operands must be equivalent extend nodes.
9088 SDValue LeftOp = MulOp.getOperand(0);
9089 SDValue RightOp = MulOp.getOperand(1);
9090
9091 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
9092 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
9093
9094 if (!IsSignExt && !IsZeroExt)
9095 return SDValue();
9096
9097 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
9098 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
9099
9100 SDValue MulhRightOp;
9101 if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
9102 unsigned ActiveBits = IsSignExt
9103 ? Constant->getAPIntValue().getSignificantBits()
9104 : Constant->getAPIntValue().getActiveBits();
9105 if (ActiveBits > NarrowVTSize)
9106 return SDValue();
9107 MulhRightOp = DAG.getConstant(
9108 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9109 NarrowVT);
9110 } else {
9111 if (LeftOp.getOpcode() != RightOp.getOpcode())
9112 return SDValue();
9113 // Check that the two extend nodes are the same type.
9114 if (NarrowVT != RightOp.getOperand(0).getValueType())
9115 return SDValue();
9116 MulhRightOp = RightOp.getOperand(0);
9117 }
9118
9119 SDValue MulhAddOp;
9120 if (ConstantSDNode *Constant = isConstOrConstSplat(AddOp)) {
9121 unsigned ActiveBits = IsSignExt
9122 ? Constant->getAPIntValue().getSignificantBits()
9123 : Constant->getAPIntValue().getActiveBits();
9124 if (ActiveBits > NarrowVTSize)
9125 return SDValue();
9126 MulhAddOp = DAG.getConstant(
9127 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9128 NarrowVT);
9129 } else {
9130 if (LeftOp.getOpcode() != AddOp.getOpcode())
9131 return SDValue();
9132 // Check that the two extend nodes are the same type.
9133 if (NarrowVT != AddOp.getOperand(0).getValueType())
9134 return SDValue();
9135 MulhAddOp = AddOp.getOperand(0);
9136 }
9137
9138 EVT WideVT = LeftOp.getValueType();
9139 // Proceed with the transformation if the wide types match.
9140 assert((WideVT == RightOp.getValueType()) &&
9141 "Cannot have a multiply node with two different operand types.");
9142 assert((WideVT == AddOp.getValueType()) &&
9143 "Cannot have an add node with two different operand types.");
9144
9145 // Proceed with the transformation if the wide type is twice as large
9146 // as the narrow type.
9147 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
9148 return SDValue();
9149
9150 // Check the shift amount with the narrow type size.
9151 // Proceed with the transformation if the shift amount is the width
9152 // of the narrow type.
9153 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
9154 if (ShiftAmt != NarrowVTSize)
9155 return SDValue();
9156
9157 // Proceed if we support the multiply-and-add-high operation.
9158 if (!(NarrowVT == MVT::v16i8 || NarrowVT == MVT::v8i16 ||
9159 NarrowVT == MVT::v4i32 ||
9160 (Subtarget.hasVectorEnhancements3() &&
9161 (NarrowVT == MVT::v2i64 || NarrowVT == MVT::i128))))
9162 return SDValue();
9163
9164 // Emit the VMAH (signed) or VMALH (unsigned) operation.
9165 SDValue Result = DAG.getNode(IsSignExt ? SystemZISD::VMAH : SystemZISD::VMALH,
9166 DL, NarrowVT, LeftOp.getOperand(0),
9167 MulhRightOp, MulhAddOp);
9168 bool IsSigned = N->getOpcode() == ISD::SRA;
9169 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
9170}
9171
9172// Op is an operand of a multiplication. Check whether this can be folded
9173// into an even/odd widening operation; if so, return the opcode to be used
9174// and update Op to the appropriate sub-operand. Note that the caller must
9175// verify that *both* operands of the multiplication support the operation.
9177 const SystemZSubtarget &Subtarget,
9178 SDValue &Op) {
9179 EVT VT = Op.getValueType();
9180
9181 // Check for (sign/zero_extend_vector_inreg (vector_shuffle)) corresponding
9182 // to selecting the even or odd vector elements.
9183 if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
9184 (Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9185 Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG)) {
9186 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
9187 unsigned NumElts = VT.getVectorNumElements();
9188 Op = Op.getOperand(0);
9189 if (Op.getValueType().getVectorNumElements() == 2 * NumElts &&
9190 Op.getOpcode() == ISD::VECTOR_SHUFFLE) {
9192 ArrayRef<int> ShuffleMask = SVN->getMask();
9193 bool CanUseEven = true, CanUseOdd = true;
9194 for (unsigned Elt = 0; Elt < NumElts; Elt++) {
9195 if (ShuffleMask[Elt] == -1)
9196 continue;
9197 if (unsigned(ShuffleMask[Elt]) != 2 * Elt)
9198 CanUseEven = false;
9199 if (unsigned(ShuffleMask[Elt]) != 2 * Elt + 1)
9200 CanUseOdd = false;
9201 }
9202 Op = Op.getOperand(0);
9203 if (CanUseEven)
9204 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9205 if (CanUseOdd)
9206 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9207 }
9208 }
9209
9210 // For z17, we can also support the v2i64->i128 case, which looks like
9211 // (sign/zero_extend (extract_vector_elt X 0/1))
9212 if (VT == MVT::i128 && Subtarget.hasVectorEnhancements3() &&
9213 (Op.getOpcode() == ISD::SIGN_EXTEND ||
9214 Op.getOpcode() == ISD::ZERO_EXTEND)) {
9215 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND;
9216 Op = Op.getOperand(0);
9217 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9218 Op.getOperand(0).getValueType() == MVT::v2i64 &&
9219 Op.getOperand(1).getOpcode() == ISD::Constant) {
9220 unsigned Elem = Op.getConstantOperandVal(1);
9221 Op = Op.getOperand(0);
9222 if (Elem == 0)
9223 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9224 if (Elem == 1)
9225 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9226 }
9227 }
9228
9229 return 0;
9230}
9231
9232SDValue SystemZTargetLowering::combineMUL(
9233 SDNode *N, DAGCombinerInfo &DCI) const {
9234 SelectionDAG &DAG = DCI.DAG;
9235
9236 // Detect even/odd widening multiplication.
9237 SDValue Op0 = N->getOperand(0);
9238 SDValue Op1 = N->getOperand(1);
9239 unsigned OpcodeCand0 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op0);
9240 unsigned OpcodeCand1 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op1);
9241 if (OpcodeCand0 && OpcodeCand0 == OpcodeCand1)
9242 return DAG.getNode(OpcodeCand0, SDLoc(N), N->getValueType(0), Op0, Op1);
9243
9244 return SDValue();
9245}
9246
9247SDValue SystemZTargetLowering::combineINTRINSIC(
9248 SDNode *N, DAGCombinerInfo &DCI) const {
9249 SelectionDAG &DAG = DCI.DAG;
9250
9251 unsigned Id = N->getConstantOperandVal(1);
9252 switch (Id) {
9253 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
9254 // or larger is simply a vector load.
9255 case Intrinsic::s390_vll:
9256 case Intrinsic::s390_vlrl:
9257 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
9258 if (C->getZExtValue() >= 15)
9259 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
9260 N->getOperand(3), MachinePointerInfo());
9261 break;
9262 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
9263 case Intrinsic::s390_vstl:
9264 case Intrinsic::s390_vstrl:
9265 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
9266 if (C->getZExtValue() >= 15)
9267 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
9268 N->getOperand(4), MachinePointerInfo());
9269 break;
9270 }
9271
9272 return SDValue();
9273}
9274
9275SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
9276 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
9277 return N->getOperand(0);
9278 return N;
9279}
9280
9282 DAGCombinerInfo &DCI) const {
9283 switch(N->getOpcode()) {
9284 default: break;
9285 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
9286 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
9287 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
9288 case SystemZISD::MERGE_HIGH:
9289 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
9290 case ISD::LOAD: return combineLOAD(N, DCI);
9291 case ISD::STORE: return combineSTORE(N, DCI);
9292 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
9293 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
9294 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
9296 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
9298 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
9299 case ISD::SINT_TO_FP:
9300 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
9301 case ISD::FCOPYSIGN: return combineFCOPYSIGN(N, DCI);
9302 case ISD::BSWAP: return combineBSWAP(N, DCI);
9303 case ISD::SETCC: return combineSETCC(N, DCI);
9304 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
9305 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
9306 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
9307 case ISD::SRL:
9308 case ISD::SRA: return combineShiftToMulAddHigh(N, DCI);
9309 case ISD::MUL: return combineMUL(N, DCI);
9310 case ISD::SDIV:
9311 case ISD::UDIV:
9312 case ISD::SREM:
9313 case ISD::UREM: return combineIntDIVREM(N, DCI);
9315 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
9316 }
9317
9318 return SDValue();
9319}
9320
9321// Return the demanded elements for the OpNo source operand of Op. DemandedElts
9322// are for Op.
9323static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
9324 unsigned OpNo) {
9325 EVT VT = Op.getValueType();
9326 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
9327 APInt SrcDemE;
9328 unsigned Opcode = Op.getOpcode();
9329 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9330 unsigned Id = Op.getConstantOperandVal(0);
9331 switch (Id) {
9332 case Intrinsic::s390_vpksh: // PACKS
9333 case Intrinsic::s390_vpksf:
9334 case Intrinsic::s390_vpksg:
9335 case Intrinsic::s390_vpkshs: // PACKS_CC
9336 case Intrinsic::s390_vpksfs:
9337 case Intrinsic::s390_vpksgs:
9338 case Intrinsic::s390_vpklsh: // PACKLS
9339 case Intrinsic::s390_vpklsf:
9340 case Intrinsic::s390_vpklsg:
9341 case Intrinsic::s390_vpklshs: // PACKLS_CC
9342 case Intrinsic::s390_vpklsfs:
9343 case Intrinsic::s390_vpklsgs:
9344 // VECTOR PACK truncates the elements of two source vectors into one.
9345 SrcDemE = DemandedElts;
9346 if (OpNo == 2)
9347 SrcDemE.lshrInPlace(NumElts / 2);
9348 SrcDemE = SrcDemE.trunc(NumElts / 2);
9349 break;
9350 // VECTOR UNPACK extends half the elements of the source vector.
9351 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9352 case Intrinsic::s390_vuphh:
9353 case Intrinsic::s390_vuphf:
9354 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9355 case Intrinsic::s390_vuplhh:
9356 case Intrinsic::s390_vuplhf:
9357 SrcDemE = APInt(NumElts * 2, 0);
9358 SrcDemE.insertBits(DemandedElts, 0);
9359 break;
9360 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9361 case Intrinsic::s390_vuplhw:
9362 case Intrinsic::s390_vuplf:
9363 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9364 case Intrinsic::s390_vupllh:
9365 case Intrinsic::s390_vupllf:
9366 SrcDemE = APInt(NumElts * 2, 0);
9367 SrcDemE.insertBits(DemandedElts, NumElts);
9368 break;
9369 case Intrinsic::s390_vpdi: {
9370 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
9371 SrcDemE = APInt(NumElts, 0);
9372 if (!DemandedElts[OpNo - 1])
9373 break;
9374 unsigned Mask = Op.getConstantOperandVal(3);
9375 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
9376 // Demand input element 0 or 1, given by the mask bit value.
9377 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
9378 break;
9379 }
9380 case Intrinsic::s390_vsldb: {
9381 // VECTOR SHIFT LEFT DOUBLE BY BYTE
9382 assert(VT == MVT::v16i8 && "Unexpected type.");
9383 unsigned FirstIdx = Op.getConstantOperandVal(3);
9384 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
9385 unsigned NumSrc0Els = 16 - FirstIdx;
9386 SrcDemE = APInt(NumElts, 0);
9387 if (OpNo == 1) {
9388 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
9389 SrcDemE.insertBits(DemEls, FirstIdx);
9390 } else {
9391 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
9392 SrcDemE.insertBits(DemEls, 0);
9393 }
9394 break;
9395 }
9396 case Intrinsic::s390_vperm:
9397 SrcDemE = APInt::getAllOnes(NumElts);
9398 break;
9399 default:
9400 llvm_unreachable("Unhandled intrinsic.");
9401 break;
9402 }
9403 } else {
9404 switch (Opcode) {
9405 case SystemZISD::JOIN_DWORDS:
9406 // Scalar operand.
9407 SrcDemE = APInt(1, 1);
9408 break;
9409 case SystemZISD::SELECT_CCMASK:
9410 SrcDemE = DemandedElts;
9411 break;
9412 default:
9413 llvm_unreachable("Unhandled opcode.");
9414 break;
9415 }
9416 }
9417 return SrcDemE;
9418}
9419
9420static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
9421 const APInt &DemandedElts,
9422 const SelectionDAG &DAG, unsigned Depth,
9423 unsigned OpNo) {
9424 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9425 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9426 KnownBits LHSKnown =
9427 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9428 KnownBits RHSKnown =
9429 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9430 Known = LHSKnown.intersectWith(RHSKnown);
9431}
9432
9433void
9435 KnownBits &Known,
9436 const APInt &DemandedElts,
9437 const SelectionDAG &DAG,
9438 unsigned Depth) const {
9439 Known.resetAll();
9440
9441 // Intrinsic CC result is returned in the two low bits.
9442 unsigned Tmp0, Tmp1; // not used
9443 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, Tmp0, Tmp1)) {
9444 Known.Zero.setBitsFrom(2);
9445 return;
9446 }
9447 EVT VT = Op.getValueType();
9448 if (Op.getResNo() != 0 || VT == MVT::Untyped)
9449 return;
9450 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
9451 "KnownBits does not match VT in bitwidth");
9452 assert ((!VT.isVector() ||
9453 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
9454 "DemandedElts does not match VT number of elements");
9455 unsigned BitWidth = Known.getBitWidth();
9456 unsigned Opcode = Op.getOpcode();
9457 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9458 bool IsLogical = false;
9459 unsigned Id = Op.getConstantOperandVal(0);
9460 switch (Id) {
9461 case Intrinsic::s390_vpksh: // PACKS
9462 case Intrinsic::s390_vpksf:
9463 case Intrinsic::s390_vpksg:
9464 case Intrinsic::s390_vpkshs: // PACKS_CC
9465 case Intrinsic::s390_vpksfs:
9466 case Intrinsic::s390_vpksgs:
9467 case Intrinsic::s390_vpklsh: // PACKLS
9468 case Intrinsic::s390_vpklsf:
9469 case Intrinsic::s390_vpklsg:
9470 case Intrinsic::s390_vpklshs: // PACKLS_CC
9471 case Intrinsic::s390_vpklsfs:
9472 case Intrinsic::s390_vpklsgs:
9473 case Intrinsic::s390_vpdi:
9474 case Intrinsic::s390_vsldb:
9475 case Intrinsic::s390_vperm:
9476 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
9477 break;
9478 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9479 case Intrinsic::s390_vuplhh:
9480 case Intrinsic::s390_vuplhf:
9481 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9482 case Intrinsic::s390_vupllh:
9483 case Intrinsic::s390_vupllf:
9484 IsLogical = true;
9485 [[fallthrough]];
9486 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9487 case Intrinsic::s390_vuphh:
9488 case Intrinsic::s390_vuphf:
9489 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9490 case Intrinsic::s390_vuplhw:
9491 case Intrinsic::s390_vuplf: {
9492 SDValue SrcOp = Op.getOperand(1);
9493 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
9494 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
9495 if (IsLogical) {
9496 Known = Known.zext(BitWidth);
9497 } else
9498 Known = Known.sext(BitWidth);
9499 break;
9500 }
9501 default:
9502 break;
9503 }
9504 } else {
9505 switch (Opcode) {
9506 case SystemZISD::JOIN_DWORDS:
9507 case SystemZISD::SELECT_CCMASK:
9508 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
9509 break;
9510 case SystemZISD::REPLICATE: {
9511 SDValue SrcOp = Op.getOperand(0);
9512 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
9514 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
9515 break;
9516 }
9517 default:
9518 break;
9519 }
9520 }
9521
9522 // Known has the width of the source operand(s). Adjust if needed to match
9523 // the passed bitwidth.
9524 if (Known.getBitWidth() != BitWidth)
9525 Known = Known.anyextOrTrunc(BitWidth);
9526}
9527
9528static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
9529 const SelectionDAG &DAG, unsigned Depth,
9530 unsigned OpNo) {
9531 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9532 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9533 if (LHS == 1) return 1; // Early out.
9534 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9535 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9536 if (RHS == 1) return 1; // Early out.
9537 unsigned Common = std::min(LHS, RHS);
9538 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
9539 EVT VT = Op.getValueType();
9540 unsigned VTBits = VT.getScalarSizeInBits();
9541 if (SrcBitWidth > VTBits) { // PACK
9542 unsigned SrcExtraBits = SrcBitWidth - VTBits;
9543 if (Common > SrcExtraBits)
9544 return (Common - SrcExtraBits);
9545 return 1;
9546 }
9547 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
9548 return Common;
9549}
9550
9551unsigned
9553 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
9554 unsigned Depth) const {
9555 if (Op.getResNo() != 0)
9556 return 1;
9557 unsigned Opcode = Op.getOpcode();
9558 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9559 unsigned Id = Op.getConstantOperandVal(0);
9560 switch (Id) {
9561 case Intrinsic::s390_vpksh: // PACKS
9562 case Intrinsic::s390_vpksf:
9563 case Intrinsic::s390_vpksg:
9564 case Intrinsic::s390_vpkshs: // PACKS_CC
9565 case Intrinsic::s390_vpksfs:
9566 case Intrinsic::s390_vpksgs:
9567 case Intrinsic::s390_vpklsh: // PACKLS
9568 case Intrinsic::s390_vpklsf:
9569 case Intrinsic::s390_vpklsg:
9570 case Intrinsic::s390_vpklshs: // PACKLS_CC
9571 case Intrinsic::s390_vpklsfs:
9572 case Intrinsic::s390_vpklsgs:
9573 case Intrinsic::s390_vpdi:
9574 case Intrinsic::s390_vsldb:
9575 case Intrinsic::s390_vperm:
9576 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
9577 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9578 case Intrinsic::s390_vuphh:
9579 case Intrinsic::s390_vuphf:
9580 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9581 case Intrinsic::s390_vuplhw:
9582 case Intrinsic::s390_vuplf: {
9583 SDValue PackedOp = Op.getOperand(1);
9584 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
9585 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
9586 EVT VT = Op.getValueType();
9587 unsigned VTBits = VT.getScalarSizeInBits();
9588 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
9589 return Tmp;
9590 }
9591 default:
9592 break;
9593 }
9594 } else {
9595 switch (Opcode) {
9596 case SystemZISD::SELECT_CCMASK:
9597 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
9598 default:
9599 break;
9600 }
9601 }
9602
9603 return 1;
9604}
9605
9608 const APInt &DemandedElts, const SelectionDAG &DAG,
9609 bool PoisonOnly, unsigned Depth) const {
9610 switch (Op->getOpcode()) {
9611 case SystemZISD::PCREL_WRAPPER:
9612 case SystemZISD::PCREL_OFFSET:
9613 return true;
9614 }
9615 return false;
9616}
9617
9618unsigned
9620 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9621 unsigned StackAlign = TFI->getStackAlignment();
9622 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
9623 "Unexpected stack alignment");
9624 // The default stack probe size is 4096 if the function has no
9625 // stack-probe-size attribute.
9626 unsigned StackProbeSize =
9627 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
9628 // Round down to the stack alignment.
9629 StackProbeSize &= ~(StackAlign - 1);
9630 return StackProbeSize ? StackProbeSize : StackAlign;
9631}
9632
9633//===----------------------------------------------------------------------===//
9634// Custom insertion
9635//===----------------------------------------------------------------------===//
9636
9637// Force base value Base into a register before MI. Return the register.
9639 const SystemZInstrInfo *TII) {
9640 MachineBasicBlock *MBB = MI.getParent();
9641 MachineFunction &MF = *MBB->getParent();
9643
9644 if (Base.isReg()) {
9645 // Copy Base into a new virtual register to help register coalescing in
9646 // cases with multiple uses.
9647 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9648 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
9649 .add(Base);
9650 return Reg;
9651 }
9652
9653 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9654 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
9655 .add(Base)
9656 .addImm(0)
9657 .addReg(0);
9658 return Reg;
9659}
9660
9661// The CC operand of MI might be missing a kill marker because there
9662// were multiple uses of CC, and ISel didn't know which to mark.
9663// Figure out whether MI should have had a kill marker.
9665 // Scan forward through BB for a use/def of CC.
9667 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
9668 const MachineInstr &MI = *miI;
9669 if (MI.readsRegister(SystemZ::CC, /*TRI=*/nullptr))
9670 return false;
9671 if (MI.definesRegister(SystemZ::CC, /*TRI=*/nullptr))
9672 break; // Should have kill-flag - update below.
9673 }
9674
9675 // If we hit the end of the block, check whether CC is live into a
9676 // successor.
9677 if (miI == MBB->end()) {
9678 for (const MachineBasicBlock *Succ : MBB->successors())
9679 if (Succ->isLiveIn(SystemZ::CC))
9680 return false;
9681 }
9682
9683 return true;
9684}
9685
9686// Return true if it is OK for this Select pseudo-opcode to be cascaded
9687// together with other Select pseudo-opcodes into a single basic-block with
9688// a conditional jump around it.
9690 switch (MI.getOpcode()) {
9691 case SystemZ::Select32:
9692 case SystemZ::Select64:
9693 case SystemZ::Select128:
9694 case SystemZ::SelectF32:
9695 case SystemZ::SelectF64:
9696 case SystemZ::SelectF128:
9697 case SystemZ::SelectVR32:
9698 case SystemZ::SelectVR64:
9699 case SystemZ::SelectVR128:
9700 return true;
9701
9702 default:
9703 return false;
9704 }
9705}
9706
9707// Helper function, which inserts PHI functions into SinkMBB:
9708// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
9709// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
9711 MachineBasicBlock *TrueMBB,
9712 MachineBasicBlock *FalseMBB,
9713 MachineBasicBlock *SinkMBB) {
9714 MachineFunction *MF = TrueMBB->getParent();
9716
9717 MachineInstr *FirstMI = Selects.front();
9718 unsigned CCValid = FirstMI->getOperand(3).getImm();
9719 unsigned CCMask = FirstMI->getOperand(4).getImm();
9720
9721 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
9722
9723 // As we are creating the PHIs, we have to be careful if there is more than
9724 // one. Later Selects may reference the results of earlier Selects, but later
9725 // PHIs have to reference the individual true/false inputs from earlier PHIs.
9726 // That also means that PHI construction must work forward from earlier to
9727 // later, and that the code must maintain a mapping from earlier PHI's
9728 // destination registers, and the registers that went into the PHI.
9730
9731 for (auto *MI : Selects) {
9732 Register DestReg = MI->getOperand(0).getReg();
9733 Register TrueReg = MI->getOperand(1).getReg();
9734 Register FalseReg = MI->getOperand(2).getReg();
9735
9736 // If this Select we are generating is the opposite condition from
9737 // the jump we generated, then we have to swap the operands for the
9738 // PHI that is going to be generated.
9739 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
9740 std::swap(TrueReg, FalseReg);
9741
9742 if (auto It = RegRewriteTable.find(TrueReg); It != RegRewriteTable.end())
9743 TrueReg = It->second.first;
9744
9745 if (auto It = RegRewriteTable.find(FalseReg); It != RegRewriteTable.end())
9746 FalseReg = It->second.second;
9747
9748 DebugLoc DL = MI->getDebugLoc();
9749 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
9750 .addReg(TrueReg).addMBB(TrueMBB)
9751 .addReg(FalseReg).addMBB(FalseMBB);
9752
9753 // Add this PHI to the rewrite table.
9754 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
9755 }
9756
9757 MF->getProperties().resetNoPHIs();
9758}
9759
9761SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
9762 MachineBasicBlock *BB) const {
9763 MachineFunction &MF = *BB->getParent();
9764 MachineFrameInfo &MFI = MF.getFrameInfo();
9765 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
9766 assert(TFL->hasReservedCallFrame(MF) &&
9767 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
9768 (void)TFL;
9769 // Get the MaxCallFrameSize value and erase MI since it serves no further
9770 // purpose as the call frame is statically reserved in the prolog. Set
9771 // AdjustsStack as MI is *not* mapped as a frame instruction.
9772 uint32_t NumBytes = MI.getOperand(0).getImm();
9773 if (NumBytes > MFI.getMaxCallFrameSize())
9774 MFI.setMaxCallFrameSize(NumBytes);
9775 MFI.setAdjustsStack(true);
9776
9777 MI.eraseFromParent();
9778 return BB;
9779}
9780
9781// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
9783SystemZTargetLowering::emitSelect(MachineInstr &MI,
9784 MachineBasicBlock *MBB) const {
9785 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
9786 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9787
9788 unsigned CCValid = MI.getOperand(3).getImm();
9789 unsigned CCMask = MI.getOperand(4).getImm();
9790
9791 // If we have a sequence of Select* pseudo instructions using the
9792 // same condition code value, we want to expand all of them into
9793 // a single pair of basic blocks using the same condition.
9794 SmallVector<MachineInstr*, 8> Selects;
9795 SmallVector<MachineInstr*, 8> DbgValues;
9796 Selects.push_back(&MI);
9797 unsigned Count = 0;
9798 for (MachineInstr &NextMI : llvm::make_range(
9799 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
9800 if (isSelectPseudo(NextMI)) {
9801 assert(NextMI.getOperand(3).getImm() == CCValid &&
9802 "Bad CCValid operands since CC was not redefined.");
9803 if (NextMI.getOperand(4).getImm() == CCMask ||
9804 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
9805 Selects.push_back(&NextMI);
9806 continue;
9807 }
9808 break;
9809 }
9810 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9811 NextMI.usesCustomInsertionHook())
9812 break;
9813 bool User = false;
9814 for (auto *SelMI : Selects)
9815 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
9816 User = true;
9817 break;
9818 }
9819 if (NextMI.isDebugInstr()) {
9820 if (User) {
9821 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
9822 DbgValues.push_back(&NextMI);
9823 }
9824 } else if (User || ++Count > 20)
9825 break;
9826 }
9827
9828 MachineInstr *LastMI = Selects.back();
9829 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9830 checkCCKill(*LastMI, MBB));
9831 MachineBasicBlock *StartMBB = MBB;
9832 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB);
9833 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
9834
9835 // Unless CC was killed in the last Select instruction, mark it as
9836 // live-in to both FalseMBB and JoinMBB.
9837 if (!CCKilled) {
9838 FalseMBB->addLiveIn(SystemZ::CC);
9839 JoinMBB->addLiveIn(SystemZ::CC);
9840 }
9841
9842 // StartMBB:
9843 // BRC CCMask, JoinMBB
9844 // # fallthrough to FalseMBB
9845 MBB = StartMBB;
9846 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
9847 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
9848 MBB->addSuccessor(JoinMBB);
9849 MBB->addSuccessor(FalseMBB);
9850
9851 // FalseMBB:
9852 // # fallthrough to JoinMBB
9853 MBB = FalseMBB;
9854 MBB->addSuccessor(JoinMBB);
9855
9856 // JoinMBB:
9857 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
9858 // ...
9859 MBB = JoinMBB;
9860 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
9861 for (auto *SelMI : Selects)
9862 SelMI->eraseFromParent();
9863
9865 for (auto *DbgMI : DbgValues)
9866 MBB->splice(InsertPos, StartMBB, DbgMI);
9867
9868 return JoinMBB;
9869}
9870
9871// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
9872// StoreOpcode is the store to use and Invert says whether the store should
9873// happen when the condition is false rather than true. If a STORE ON
9874// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
9875MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
9877 unsigned StoreOpcode,
9878 unsigned STOCOpcode,
9879 bool Invert) const {
9880 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9881
9882 Register SrcReg = MI.getOperand(0).getReg();
9883 MachineOperand Base = MI.getOperand(1);
9884 int64_t Disp = MI.getOperand(2).getImm();
9885 Register IndexReg = MI.getOperand(3).getReg();
9886 unsigned CCValid = MI.getOperand(4).getImm();
9887 unsigned CCMask = MI.getOperand(5).getImm();
9888 DebugLoc DL = MI.getDebugLoc();
9889
9890 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
9891
9892 // ISel pattern matching also adds a load memory operand of the same
9893 // address, so take special care to find the storing memory operand.
9894 MachineMemOperand *MMO = nullptr;
9895 for (auto *I : MI.memoperands())
9896 if (I->isStore()) {
9897 MMO = I;
9898 break;
9899 }
9900
9901 // Use STOCOpcode if possible. We could use different store patterns in
9902 // order to avoid matching the index register, but the performance trade-offs
9903 // might be more complicated in that case.
9904 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
9905 if (Invert)
9906 CCMask ^= CCValid;
9907
9908 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
9909 .addReg(SrcReg)
9910 .add(Base)
9911 .addImm(Disp)
9912 .addImm(CCValid)
9913 .addImm(CCMask)
9914 .addMemOperand(MMO);
9915
9916 MI.eraseFromParent();
9917 return MBB;
9918 }
9919
9920 // Get the condition needed to branch around the store.
9921 if (!Invert)
9922 CCMask ^= CCValid;
9923
9924 MachineBasicBlock *StartMBB = MBB;
9925 MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB);
9926 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
9927
9928 // Unless CC was killed in the CondStore instruction, mark it as
9929 // live-in to both FalseMBB and JoinMBB.
9930 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) &&
9931 !checkCCKill(MI, JoinMBB)) {
9932 FalseMBB->addLiveIn(SystemZ::CC);
9933 JoinMBB->addLiveIn(SystemZ::CC);
9934 }
9935
9936 // StartMBB:
9937 // BRC CCMask, JoinMBB
9938 // # fallthrough to FalseMBB
9939 MBB = StartMBB;
9940 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9941 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
9942 MBB->addSuccessor(JoinMBB);
9943 MBB->addSuccessor(FalseMBB);
9944
9945 // FalseMBB:
9946 // store %SrcReg, %Disp(%Index,%Base)
9947 // # fallthrough to JoinMBB
9948 MBB = FalseMBB;
9949 BuildMI(MBB, DL, TII->get(StoreOpcode))
9950 .addReg(SrcReg)
9951 .add(Base)
9952 .addImm(Disp)
9953 .addReg(IndexReg)
9954 .addMemOperand(MMO);
9955 MBB->addSuccessor(JoinMBB);
9956
9957 MI.eraseFromParent();
9958 return JoinMBB;
9959}
9960
9961// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
9963SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
9965 bool Unsigned) const {
9966 MachineFunction &MF = *MBB->getParent();
9967 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9968 MachineRegisterInfo &MRI = MF.getRegInfo();
9969
9970 // Synthetic instruction to compare 128-bit values.
9971 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
9972 Register Op0 = MI.getOperand(0).getReg();
9973 Register Op1 = MI.getOperand(1).getReg();
9974
9975 MachineBasicBlock *StartMBB = MBB;
9976 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI, MBB);
9977 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
9978
9979 // StartMBB:
9980 //
9981 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
9982 // Swap the inputs to get:
9983 // CC 1 if high(Op0) > high(Op1)
9984 // CC 2 if high(Op0) < high(Op1)
9985 // CC 0 if high(Op0) == high(Op1)
9986 //
9987 // If CC != 0, we'd done, so jump over the next instruction.
9988 //
9989 // VEC[L]G Op1, Op0
9990 // JNE JoinMBB
9991 // # fallthrough to HiEqMBB
9992 MBB = StartMBB;
9993 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
9994 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
9995 .addReg(Op1).addReg(Op0);
9996 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
9998 MBB->addSuccessor(JoinMBB);
9999 MBB->addSuccessor(HiEqMBB);
10000
10001 // HiEqMBB:
10002 //
10003 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
10004 // Since we already know the high parts are equal, the CC
10005 // result will only depend on the low parts:
10006 // CC 1 if low(Op0) > low(Op1)
10007 // CC 3 if low(Op0) <= low(Op1)
10008 //
10009 // VCHLGS Tmp, Op0, Op1
10010 // # fallthrough to JoinMBB
10011 MBB = HiEqMBB;
10012 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
10013 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
10014 .addReg(Op0).addReg(Op1);
10015 MBB->addSuccessor(JoinMBB);
10016
10017 // Mark CC as live-in to JoinMBB.
10018 JoinMBB->addLiveIn(SystemZ::CC);
10019
10020 MI.eraseFromParent();
10021 return JoinMBB;
10022}
10023
10024// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
10025// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
10026// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
10027// whether the field should be inverted after performing BinOpcode (e.g. for
10028// NAND).
10029MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
10030 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
10031 bool Invert) const {
10032 MachineFunction &MF = *MBB->getParent();
10033 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10034 MachineRegisterInfo &MRI = MF.getRegInfo();
10035
10036 // Extract the operands. Base can be a register or a frame index.
10037 // Src2 can be a register or immediate.
10038 Register Dest = MI.getOperand(0).getReg();
10039 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10040 int64_t Disp = MI.getOperand(2).getImm();
10041 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
10042 Register BitShift = MI.getOperand(4).getReg();
10043 Register NegBitShift = MI.getOperand(5).getReg();
10044 unsigned BitSize = MI.getOperand(6).getImm();
10045 DebugLoc DL = MI.getDebugLoc();
10046
10047 // Get the right opcodes for the displacement.
10048 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10049 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10050 assert(LOpcode && CSOpcode && "Displacement out of range");
10051
10052 // Create virtual registers for temporary results.
10053 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10054 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10055 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10056 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10057 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10058
10059 // Insert a basic block for the main loop.
10060 MachineBasicBlock *StartMBB = MBB;
10061 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10062 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10063
10064 // StartMBB:
10065 // ...
10066 // %OrigVal = L Disp(%Base)
10067 // # fall through to LoopMBB
10068 MBB = StartMBB;
10069 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
10070 MBB->addSuccessor(LoopMBB);
10071
10072 // LoopMBB:
10073 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
10074 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10075 // %RotatedNewVal = OP %RotatedOldVal, %Src2
10076 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10077 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10078 // JNE LoopMBB
10079 // # fall through to DoneMBB
10080 MBB = LoopMBB;
10081 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10082 .addReg(OrigVal).addMBB(StartMBB)
10083 .addReg(Dest).addMBB(LoopMBB);
10084 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
10085 .addReg(OldVal).addReg(BitShift).addImm(0);
10086 if (Invert) {
10087 // Perform the operation normally and then invert every bit of the field.
10088 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10089 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
10090 // XILF with the upper BitSize bits set.
10091 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
10092 .addReg(Tmp).addImm(-1U << (32 - BitSize));
10093 } else if (BinOpcode)
10094 // A simply binary operation.
10095 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
10096 .addReg(RotatedOldVal)
10097 .add(Src2);
10098 else
10099 // Use RISBG to rotate Src2 into position and use it to replace the
10100 // field in RotatedOldVal.
10101 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
10102 .addReg(RotatedOldVal).addReg(Src2.getReg())
10103 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
10104 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
10105 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
10106 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
10107 .addReg(OldVal)
10108 .addReg(NewVal)
10109 .add(Base)
10110 .addImm(Disp);
10111 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10113 MBB->addSuccessor(LoopMBB);
10114 MBB->addSuccessor(DoneMBB);
10115
10116 MI.eraseFromParent();
10117 return DoneMBB;
10118}
10119
10120// Implement EmitInstrWithCustomInserter for subword pseudo
10121// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
10122// instruction that should be used to compare the current field with the
10123// minimum or maximum value. KeepOldMask is the BRC condition-code mask
10124// for when the current field should be kept.
10125MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
10126 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
10127 unsigned KeepOldMask) const {
10128 MachineFunction &MF = *MBB->getParent();
10129 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10130 MachineRegisterInfo &MRI = MF.getRegInfo();
10131
10132 // Extract the operands. Base can be a register or a frame index.
10133 Register Dest = MI.getOperand(0).getReg();
10134 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10135 int64_t Disp = MI.getOperand(2).getImm();
10136 Register Src2 = MI.getOperand(3).getReg();
10137 Register BitShift = MI.getOperand(4).getReg();
10138 Register NegBitShift = MI.getOperand(5).getReg();
10139 unsigned BitSize = MI.getOperand(6).getImm();
10140 DebugLoc DL = MI.getDebugLoc();
10141
10142 // Get the right opcodes for the displacement.
10143 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10144 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10145 assert(LOpcode && CSOpcode && "Displacement out of range");
10146
10147 // Create virtual registers for temporary results.
10148 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10149 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10150 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10151 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10152 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10153 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10154
10155 // Insert 3 basic blocks for the loop.
10156 MachineBasicBlock *StartMBB = MBB;
10157 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10158 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10159 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
10160 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
10161
10162 // StartMBB:
10163 // ...
10164 // %OrigVal = L Disp(%Base)
10165 // # fall through to LoopMBB
10166 MBB = StartMBB;
10167 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
10168 MBB->addSuccessor(LoopMBB);
10169
10170 // LoopMBB:
10171 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
10172 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10173 // CompareOpcode %RotatedOldVal, %Src2
10174 // BRC KeepOldMask, UpdateMBB
10175 MBB = LoopMBB;
10176 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10177 .addReg(OrigVal).addMBB(StartMBB)
10178 .addReg(Dest).addMBB(UpdateMBB);
10179 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
10180 .addReg(OldVal).addReg(BitShift).addImm(0);
10181 BuildMI(MBB, DL, TII->get(CompareOpcode))
10182 .addReg(RotatedOldVal).addReg(Src2);
10183 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10184 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
10185 MBB->addSuccessor(UpdateMBB);
10186 MBB->addSuccessor(UseAltMBB);
10187
10188 // UseAltMBB:
10189 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
10190 // # fall through to UpdateMBB
10191 MBB = UseAltMBB;
10192 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
10193 .addReg(RotatedOldVal).addReg(Src2)
10194 .addImm(32).addImm(31 + BitSize).addImm(0);
10195 MBB->addSuccessor(UpdateMBB);
10196
10197 // UpdateMBB:
10198 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
10199 // [ %RotatedAltVal, UseAltMBB ]
10200 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10201 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10202 // JNE LoopMBB
10203 // # fall through to DoneMBB
10204 MBB = UpdateMBB;
10205 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
10206 .addReg(RotatedOldVal).addMBB(LoopMBB)
10207 .addReg(RotatedAltVal).addMBB(UseAltMBB);
10208 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
10209 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
10210 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
10211 .addReg(OldVal)
10212 .addReg(NewVal)
10213 .add(Base)
10214 .addImm(Disp);
10215 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10217 MBB->addSuccessor(LoopMBB);
10218 MBB->addSuccessor(DoneMBB);
10219
10220 MI.eraseFromParent();
10221 return DoneMBB;
10222}
10223
10224// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
10225// instruction MI.
10227SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
10228 MachineBasicBlock *MBB) const {
10229 MachineFunction &MF = *MBB->getParent();
10230 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10231 MachineRegisterInfo &MRI = MF.getRegInfo();
10232
10233 // Extract the operands. Base can be a register or a frame index.
10234 Register Dest = MI.getOperand(0).getReg();
10235 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10236 int64_t Disp = MI.getOperand(2).getImm();
10237 Register CmpVal = MI.getOperand(3).getReg();
10238 Register OrigSwapVal = MI.getOperand(4).getReg();
10239 Register BitShift = MI.getOperand(5).getReg();
10240 Register NegBitShift = MI.getOperand(6).getReg();
10241 int64_t BitSize = MI.getOperand(7).getImm();
10242 DebugLoc DL = MI.getDebugLoc();
10243
10244 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
10245
10246 // Get the right opcodes for the displacement and zero-extension.
10247 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10248 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10249 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
10250 assert(LOpcode && CSOpcode && "Displacement out of range");
10251
10252 // Create virtual registers for temporary results.
10253 Register OrigOldVal = MRI.createVirtualRegister(RC);
10254 Register OldVal = MRI.createVirtualRegister(RC);
10255 Register SwapVal = MRI.createVirtualRegister(RC);
10256 Register StoreVal = MRI.createVirtualRegister(RC);
10257 Register OldValRot = MRI.createVirtualRegister(RC);
10258 Register RetryOldVal = MRI.createVirtualRegister(RC);
10259 Register RetrySwapVal = MRI.createVirtualRegister(RC);
10260
10261 // Insert 2 basic blocks for the loop.
10262 MachineBasicBlock *StartMBB = MBB;
10263 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10264 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10265 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
10266
10267 // StartMBB:
10268 // ...
10269 // %OrigOldVal = L Disp(%Base)
10270 // # fall through to LoopMBB
10271 MBB = StartMBB;
10272 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
10273 .add(Base)
10274 .addImm(Disp)
10275 .addReg(0);
10276 MBB->addSuccessor(LoopMBB);
10277
10278 // LoopMBB:
10279 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
10280 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
10281 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
10282 // ^^ The low BitSize bits contain the field
10283 // of interest.
10284 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
10285 // ^^ Replace the upper 32-BitSize bits of the
10286 // swap value with those that we loaded and rotated.
10287 // %Dest = LL[CH] %OldValRot
10288 // CR %Dest, %CmpVal
10289 // JNE DoneMBB
10290 // # Fall through to SetMBB
10291 MBB = LoopMBB;
10292 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10293 .addReg(OrigOldVal).addMBB(StartMBB)
10294 .addReg(RetryOldVal).addMBB(SetMBB);
10295 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
10296 .addReg(OrigSwapVal).addMBB(StartMBB)
10297 .addReg(RetrySwapVal).addMBB(SetMBB);
10298 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
10299 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
10300 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
10301 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
10302 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
10303 .addReg(OldValRot);
10304 BuildMI(MBB, DL, TII->get(SystemZ::CR))
10305 .addReg(Dest).addReg(CmpVal);
10306 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10309 MBB->addSuccessor(DoneMBB);
10310 MBB->addSuccessor(SetMBB);
10311
10312 // SetMBB:
10313 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
10314 // ^^ Rotate the new field to its proper position.
10315 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
10316 // JNE LoopMBB
10317 // # fall through to ExitMBB
10318 MBB = SetMBB;
10319 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
10320 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
10321 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
10322 .addReg(OldVal)
10323 .addReg(StoreVal)
10324 .add(Base)
10325 .addImm(Disp);
10326 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10328 MBB->addSuccessor(LoopMBB);
10329 MBB->addSuccessor(DoneMBB);
10330
10331 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
10332 // to the block after the loop. At this point, CC may have been defined
10333 // either by the CR in LoopMBB or by the CS in SetMBB.
10334 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))
10335 DoneMBB->addLiveIn(SystemZ::CC);
10336
10337 MI.eraseFromParent();
10338 return DoneMBB;
10339}
10340
10341// Emit a move from two GR64s to a GR128.
10343SystemZTargetLowering::emitPair128(MachineInstr &MI,
10344 MachineBasicBlock *MBB) const {
10345 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10346 const DebugLoc &DL = MI.getDebugLoc();
10347
10348 Register Dest = MI.getOperand(0).getReg();
10349 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest)
10350 .add(MI.getOperand(1))
10351 .addImm(SystemZ::subreg_h64)
10352 .add(MI.getOperand(2))
10353 .addImm(SystemZ::subreg_l64);
10354 MI.eraseFromParent();
10355 return MBB;
10356}
10357
10358// Emit an extension from a GR64 to a GR128. ClearEven is true
10359// if the high register of the GR128 value must be cleared or false if
10360// it's "don't care".
10361MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
10363 bool ClearEven) const {
10364 MachineFunction &MF = *MBB->getParent();
10365 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10366 MachineRegisterInfo &MRI = MF.getRegInfo();
10367 DebugLoc DL = MI.getDebugLoc();
10368
10369 Register Dest = MI.getOperand(0).getReg();
10370 Register Src = MI.getOperand(1).getReg();
10371 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10372
10373 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
10374 if (ClearEven) {
10375 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10376 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10377
10378 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
10379 .addImm(0);
10380 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
10381 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
10382 In128 = NewIn128;
10383 }
10384 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
10385 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
10386
10387 MI.eraseFromParent();
10388 return MBB;
10389}
10390
10392SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
10394 unsigned Opcode, bool IsMemset) const {
10395 MachineFunction &MF = *MBB->getParent();
10396 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10397 MachineRegisterInfo &MRI = MF.getRegInfo();
10398 DebugLoc DL = MI.getDebugLoc();
10399
10400 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
10401 uint64_t DestDisp = MI.getOperand(1).getImm();
10402 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
10403 uint64_t SrcDisp;
10404
10405 // Fold the displacement Disp if it is out of range.
10406 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
10407 if (!isUInt<12>(Disp)) {
10408 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10409 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
10410 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
10411 .add(Base).addImm(Disp).addReg(0);
10413 Disp = 0;
10414 }
10415 };
10416
10417 if (!IsMemset) {
10418 SrcBase = earlyUseOperand(MI.getOperand(2));
10419 SrcDisp = MI.getOperand(3).getImm();
10420 } else {
10421 SrcBase = DestBase;
10422 SrcDisp = DestDisp++;
10423 foldDisplIfNeeded(DestBase, DestDisp);
10424 }
10425
10426 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
10427 bool IsImmForm = LengthMO.isImm();
10428 bool IsRegForm = !IsImmForm;
10429
10430 // Build and insert one Opcode of Length, with special treatment for memset.
10431 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
10433 MachineOperand DBase, uint64_t DDisp,
10434 MachineOperand SBase, uint64_t SDisp,
10435 unsigned Length) -> void {
10436 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
10437 if (IsMemset) {
10438 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
10439 if (ByteMO.isImm())
10440 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
10441 .add(SBase).addImm(SDisp).add(ByteMO);
10442 else
10443 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
10444 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
10445 if (--Length == 0)
10446 return;
10447 }
10448 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
10449 .add(DBase).addImm(DDisp).addImm(Length)
10450 .add(SBase).addImm(SDisp)
10451 .setMemRefs(MI.memoperands());
10452 };
10453
10454 bool NeedsLoop = false;
10455 uint64_t ImmLength = 0;
10456 Register LenAdjReg = SystemZ::NoRegister;
10457 if (IsImmForm) {
10458 ImmLength = LengthMO.getImm();
10459 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
10460 if (ImmLength == 0) {
10461 MI.eraseFromParent();
10462 return MBB;
10463 }
10464 if (Opcode == SystemZ::CLC) {
10465 if (ImmLength > 3 * 256)
10466 // A two-CLC sequence is a clear win over a loop, not least because
10467 // it needs only one branch. A three-CLC sequence needs the same
10468 // number of branches as a loop (i.e. 2), but is shorter. That
10469 // brings us to lengths greater than 768 bytes. It seems relatively
10470 // likely that a difference will be found within the first 768 bytes,
10471 // so we just optimize for the smallest number of branch
10472 // instructions, in order to avoid polluting the prediction buffer
10473 // too much.
10474 NeedsLoop = true;
10475 } else if (ImmLength > 6 * 256)
10476 // The heuristic we use is to prefer loops for anything that would
10477 // require 7 or more MVCs. With these kinds of sizes there isn't much
10478 // to choose between straight-line code and looping code, since the
10479 // time will be dominated by the MVCs themselves.
10480 NeedsLoop = true;
10481 } else {
10482 NeedsLoop = true;
10483 LenAdjReg = LengthMO.getReg();
10484 }
10485
10486 // When generating more than one CLC, all but the last will need to
10487 // branch to the end when a difference is found.
10488 MachineBasicBlock *EndMBB =
10489 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
10491 : nullptr);
10492
10493 if (NeedsLoop) {
10494 Register StartCountReg =
10495 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10496 if (IsImmForm) {
10497 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
10498 ImmLength &= 255;
10499 } else {
10500 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
10501 .addReg(LenAdjReg)
10502 .addReg(0)
10503 .addImm(8);
10504 }
10505
10506 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
10507 auto loadZeroAddress = [&]() -> MachineOperand {
10508 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10509 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
10510 return MachineOperand::CreateReg(Reg, false);
10511 };
10512 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
10513 DestBase = loadZeroAddress();
10514 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
10515 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
10516
10517 MachineBasicBlock *StartMBB = nullptr;
10518 MachineBasicBlock *LoopMBB = nullptr;
10519 MachineBasicBlock *NextMBB = nullptr;
10520 MachineBasicBlock *DoneMBB = nullptr;
10521 MachineBasicBlock *AllDoneMBB = nullptr;
10522
10523 Register StartSrcReg = forceReg(MI, SrcBase, TII);
10524 Register StartDestReg =
10525 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
10526
10527 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
10528 Register ThisSrcReg = MRI.createVirtualRegister(RC);
10529 Register ThisDestReg =
10530 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
10531 Register NextSrcReg = MRI.createVirtualRegister(RC);
10532 Register NextDestReg =
10533 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
10534 RC = &SystemZ::GR64BitRegClass;
10535 Register ThisCountReg = MRI.createVirtualRegister(RC);
10536 Register NextCountReg = MRI.createVirtualRegister(RC);
10537
10538 if (IsRegForm) {
10539 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10540 StartMBB = SystemZ::emitBlockAfter(MBB);
10541 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10542 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10543 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
10544
10545 // MBB:
10546 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
10547 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10548 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
10549 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10551 .addMBB(AllDoneMBB);
10552 MBB->addSuccessor(AllDoneMBB);
10553 if (!IsMemset)
10554 MBB->addSuccessor(StartMBB);
10555 else {
10556 // MemsetOneCheckMBB:
10557 // # Jump to MemsetOneMBB for a memset of length 1, or
10558 // # fall thru to StartMBB.
10559 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
10560 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
10561 MBB->addSuccessor(MemsetOneCheckMBB);
10562 MBB = MemsetOneCheckMBB;
10563 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10564 .addReg(LenAdjReg).addImm(-1);
10565 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10567 .addMBB(MemsetOneMBB);
10568 MBB->addSuccessor(MemsetOneMBB, {10, 100});
10569 MBB->addSuccessor(StartMBB, {90, 100});
10570
10571 // MemsetOneMBB:
10572 // # Jump back to AllDoneMBB after a single MVI or STC.
10573 MBB = MemsetOneMBB;
10574 insertMemMemOp(MBB, MBB->end(),
10575 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
10576 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
10577 1);
10578 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
10579 MBB->addSuccessor(AllDoneMBB);
10580 }
10581
10582 // StartMBB:
10583 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
10584 MBB = StartMBB;
10585 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10586 .addReg(StartCountReg).addImm(0);
10587 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10589 .addMBB(DoneMBB);
10590 MBB->addSuccessor(DoneMBB);
10591 MBB->addSuccessor(LoopMBB);
10592 }
10593 else {
10594 StartMBB = MBB;
10595 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10596 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10597 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10598
10599 // StartMBB:
10600 // # fall through to LoopMBB
10601 MBB->addSuccessor(LoopMBB);
10602
10603 DestBase = MachineOperand::CreateReg(NextDestReg, false);
10604 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
10605 if (EndMBB && !ImmLength)
10606 // If the loop handled the whole CLC range, DoneMBB will be empty with
10607 // CC live-through into EndMBB, so add it as live-in.
10608 DoneMBB->addLiveIn(SystemZ::CC);
10609 }
10610
10611 // LoopMBB:
10612 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
10613 // [ %NextDestReg, NextMBB ]
10614 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
10615 // [ %NextSrcReg, NextMBB ]
10616 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
10617 // [ %NextCountReg, NextMBB ]
10618 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
10619 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
10620 // ( JLH EndMBB )
10621 //
10622 // The prefetch is used only for MVC. The JLH is used only for CLC.
10623 MBB = LoopMBB;
10624 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
10625 .addReg(StartDestReg).addMBB(StartMBB)
10626 .addReg(NextDestReg).addMBB(NextMBB);
10627 if (!HaveSingleBase)
10628 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
10629 .addReg(StartSrcReg).addMBB(StartMBB)
10630 .addReg(NextSrcReg).addMBB(NextMBB);
10631 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
10632 .addReg(StartCountReg).addMBB(StartMBB)
10633 .addReg(NextCountReg).addMBB(NextMBB);
10634 if (Opcode == SystemZ::MVC)
10635 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
10637 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
10638 insertMemMemOp(MBB, MBB->end(),
10639 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
10640 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
10641 if (EndMBB) {
10642 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10644 .addMBB(EndMBB);
10645 MBB->addSuccessor(EndMBB);
10646 MBB->addSuccessor(NextMBB);
10647 }
10648
10649 // NextMBB:
10650 // %NextDestReg = LA 256(%ThisDestReg)
10651 // %NextSrcReg = LA 256(%ThisSrcReg)
10652 // %NextCountReg = AGHI %ThisCountReg, -1
10653 // CGHI %NextCountReg, 0
10654 // JLH LoopMBB
10655 // # fall through to DoneMBB
10656 //
10657 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
10658 MBB = NextMBB;
10659 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
10660 .addReg(ThisDestReg).addImm(256).addReg(0);
10661 if (!HaveSingleBase)
10662 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
10663 .addReg(ThisSrcReg).addImm(256).addReg(0);
10664 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
10665 .addReg(ThisCountReg).addImm(-1);
10666 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10667 .addReg(NextCountReg).addImm(0);
10668 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10670 .addMBB(LoopMBB);
10671 MBB->addSuccessor(LoopMBB);
10672 MBB->addSuccessor(DoneMBB);
10673
10674 MBB = DoneMBB;
10675 if (IsRegForm) {
10676 // DoneMBB:
10677 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
10678 // # Use EXecute Relative Long for the remainder of the bytes. The target
10679 // instruction of the EXRL will have a length field of 1 since 0 is an
10680 // illegal value. The number of bytes processed becomes (%LenAdjReg &
10681 // 0xff) + 1.
10682 // # Fall through to AllDoneMBB.
10683 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10684 Register RemDestReg = HaveSingleBase ? RemSrcReg
10685 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10686 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
10687 .addReg(StartDestReg).addMBB(StartMBB)
10688 .addReg(NextDestReg).addMBB(NextMBB);
10689 if (!HaveSingleBase)
10690 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
10691 .addReg(StartSrcReg).addMBB(StartMBB)
10692 .addReg(NextSrcReg).addMBB(NextMBB);
10693 if (IsMemset)
10694 insertMemMemOp(MBB, MBB->end(),
10695 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
10696 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
10697 MachineInstrBuilder EXRL_MIB =
10698 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
10699 .addImm(Opcode)
10700 .addReg(LenAdjReg)
10701 .addReg(RemDestReg).addImm(DestDisp)
10702 .addReg(RemSrcReg).addImm(SrcDisp);
10703 MBB->addSuccessor(AllDoneMBB);
10704 MBB = AllDoneMBB;
10705 if (Opcode != SystemZ::MVC) {
10706 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
10707 if (EndMBB)
10708 MBB->addLiveIn(SystemZ::CC);
10709 }
10710 }
10711 MF.getProperties().resetNoPHIs();
10712 }
10713
10714 // Handle any remaining bytes with straight-line code.
10715 while (ImmLength > 0) {
10716 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
10717 // The previous iteration might have created out-of-range displacements.
10718 // Apply them using LA/LAY if so.
10719 foldDisplIfNeeded(DestBase, DestDisp);
10720 foldDisplIfNeeded(SrcBase, SrcDisp);
10721 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
10722 DestDisp += ThisLength;
10723 SrcDisp += ThisLength;
10724 ImmLength -= ThisLength;
10725 // If there's another CLC to go, branch to the end if a difference
10726 // was found.
10727 if (EndMBB && ImmLength > 0) {
10728 MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
10729 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10731 .addMBB(EndMBB);
10732 MBB->addSuccessor(EndMBB);
10733 MBB->addSuccessor(NextMBB);
10734 MBB = NextMBB;
10735 }
10736 }
10737 if (EndMBB) {
10738 MBB->addSuccessor(EndMBB);
10739 MBB = EndMBB;
10740 MBB->addLiveIn(SystemZ::CC);
10741 }
10742
10743 MI.eraseFromParent();
10744 return MBB;
10745}
10746
10747// Decompose string pseudo-instruction MI into a loop that continually performs
10748// Opcode until CC != 3.
10749MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
10750 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10751 MachineFunction &MF = *MBB->getParent();
10752 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10753 MachineRegisterInfo &MRI = MF.getRegInfo();
10754 DebugLoc DL = MI.getDebugLoc();
10755
10756 uint64_t End1Reg = MI.getOperand(0).getReg();
10757 uint64_t Start1Reg = MI.getOperand(1).getReg();
10758 uint64_t Start2Reg = MI.getOperand(2).getReg();
10759 uint64_t CharReg = MI.getOperand(3).getReg();
10760
10761 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
10762 uint64_t This1Reg = MRI.createVirtualRegister(RC);
10763 uint64_t This2Reg = MRI.createVirtualRegister(RC);
10764 uint64_t End2Reg = MRI.createVirtualRegister(RC);
10765
10766 MachineBasicBlock *StartMBB = MBB;
10767 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10768 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10769
10770 // StartMBB:
10771 // # fall through to LoopMBB
10772 MBB->addSuccessor(LoopMBB);
10773
10774 // LoopMBB:
10775 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
10776 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
10777 // R0L = %CharReg
10778 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
10779 // JO LoopMBB
10780 // # fall through to DoneMBB
10781 //
10782 // The load of R0L can be hoisted by post-RA LICM.
10783 MBB = LoopMBB;
10784
10785 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
10786 .addReg(Start1Reg).addMBB(StartMBB)
10787 .addReg(End1Reg).addMBB(LoopMBB);
10788 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
10789 .addReg(Start2Reg).addMBB(StartMBB)
10790 .addReg(End2Reg).addMBB(LoopMBB);
10791 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
10792 BuildMI(MBB, DL, TII->get(Opcode))
10793 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
10794 .addReg(This1Reg).addReg(This2Reg);
10795 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10797 MBB->addSuccessor(LoopMBB);
10798 MBB->addSuccessor(DoneMBB);
10799
10800 DoneMBB->addLiveIn(SystemZ::CC);
10801
10802 MI.eraseFromParent();
10803 return DoneMBB;
10804}
10805
10806// Update TBEGIN instruction with final opcode and register clobbers.
10807MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
10808 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
10809 bool NoFloat) const {
10810 MachineFunction &MF = *MBB->getParent();
10811 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
10812 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10813
10814 // Update opcode.
10815 MI.setDesc(TII->get(Opcode));
10816
10817 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
10818 // Make sure to add the corresponding GRSM bits if they are missing.
10819 uint64_t Control = MI.getOperand(2).getImm();
10820 static const unsigned GPRControlBit[16] = {
10821 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
10822 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
10823 };
10824 Control |= GPRControlBit[15];
10825 if (TFI->hasFP(MF))
10826 Control |= GPRControlBit[11];
10827 MI.getOperand(2).setImm(Control);
10828
10829 // Add GPR clobbers.
10830 for (int I = 0; I < 16; I++) {
10831 if ((Control & GPRControlBit[I]) == 0) {
10832 unsigned Reg = SystemZMC::GR64Regs[I];
10833 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10834 }
10835 }
10836
10837 // Add FPR/VR clobbers.
10838 if (!NoFloat && (Control & 4) != 0) {
10839 if (Subtarget.hasVector()) {
10840 for (unsigned Reg : SystemZMC::VR128Regs) {
10841 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10842 }
10843 } else {
10844 for (unsigned Reg : SystemZMC::FP64Regs) {
10845 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10846 }
10847 }
10848 }
10849
10850 return MBB;
10851}
10852
10853MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
10854 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10855 MachineFunction &MF = *MBB->getParent();
10856 MachineRegisterInfo *MRI = &MF.getRegInfo();
10857 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10858 DebugLoc DL = MI.getDebugLoc();
10859
10860 Register SrcReg = MI.getOperand(0).getReg();
10861
10862 // Create new virtual register of the same class as source.
10863 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
10864 Register DstReg = MRI->createVirtualRegister(RC);
10865
10866 // Replace pseudo with a normal load-and-test that models the def as
10867 // well.
10868 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
10869 .addReg(SrcReg)
10870 .setMIFlags(MI.getFlags());
10871 MI.eraseFromParent();
10872
10873 return MBB;
10874}
10875
10876MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
10878 MachineFunction &MF = *MBB->getParent();
10879 MachineRegisterInfo *MRI = &MF.getRegInfo();
10880 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10881 DebugLoc DL = MI.getDebugLoc();
10882 const unsigned ProbeSize = getStackProbeSize(MF);
10883 Register DstReg = MI.getOperand(0).getReg();
10884 Register SizeReg = MI.getOperand(2).getReg();
10885
10886 MachineBasicBlock *StartMBB = MBB;
10887 MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB);
10888 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
10889 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
10890 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
10891 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
10892
10893 MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(),
10895
10896 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10897 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10898
10899 // LoopTestMBB
10900 // BRC TailTestMBB
10901 // # fallthrough to LoopBodyMBB
10902 StartMBB->addSuccessor(LoopTestMBB);
10903 MBB = LoopTestMBB;
10904 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
10905 .addReg(SizeReg)
10906 .addMBB(StartMBB)
10907 .addReg(IncReg)
10908 .addMBB(LoopBodyMBB);
10909 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
10910 .addReg(PHIReg)
10911 .addImm(ProbeSize);
10912 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10914 .addMBB(TailTestMBB);
10915 MBB->addSuccessor(LoopBodyMBB);
10916 MBB->addSuccessor(TailTestMBB);
10917
10918 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
10919 // J LoopTestMBB
10920 MBB = LoopBodyMBB;
10921 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
10922 .addReg(PHIReg)
10923 .addImm(ProbeSize);
10924 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
10925 .addReg(SystemZ::R15D)
10926 .addImm(ProbeSize);
10927 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
10928 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
10929 .setMemRefs(VolLdMMO);
10930 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
10931 MBB->addSuccessor(LoopTestMBB);
10932
10933 // TailTestMBB
10934 // BRC DoneMBB
10935 // # fallthrough to TailMBB
10936 MBB = TailTestMBB;
10937 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10938 .addReg(PHIReg)
10939 .addImm(0);
10940 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10942 .addMBB(DoneMBB);
10943 MBB->addSuccessor(TailMBB);
10944 MBB->addSuccessor(DoneMBB);
10945
10946 // TailMBB
10947 // # fallthrough to DoneMBB
10948 MBB = TailMBB;
10949 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
10950 .addReg(SystemZ::R15D)
10951 .addReg(PHIReg);
10952 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
10953 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
10954 .setMemRefs(VolLdMMO);
10955 MBB->addSuccessor(DoneMBB);
10956
10957 // DoneMBB
10958 MBB = DoneMBB;
10959 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
10960 .addReg(SystemZ::R15D);
10961
10962 MI.eraseFromParent();
10963 return DoneMBB;
10964}
10965
10966SDValue SystemZTargetLowering::
10967getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
10968 MachineFunction &MF = DAG.getMachineFunction();
10969 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
10970 SDLoc DL(SP);
10971 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
10972 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
10973}
10974
10977 switch (MI.getOpcode()) {
10978 case SystemZ::ADJCALLSTACKDOWN:
10979 case SystemZ::ADJCALLSTACKUP:
10980 return emitAdjCallStack(MI, MBB);
10981
10982 case SystemZ::Select32:
10983 case SystemZ::Select64:
10984 case SystemZ::Select128:
10985 case SystemZ::SelectF32:
10986 case SystemZ::SelectF64:
10987 case SystemZ::SelectF128:
10988 case SystemZ::SelectVR32:
10989 case SystemZ::SelectVR64:
10990 case SystemZ::SelectVR128:
10991 return emitSelect(MI, MBB);
10992
10993 case SystemZ::CondStore8Mux:
10994 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
10995 case SystemZ::CondStore8MuxInv:
10996 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
10997 case SystemZ::CondStore16Mux:
10998 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
10999 case SystemZ::CondStore16MuxInv:
11000 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
11001 case SystemZ::CondStore32Mux:
11002 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
11003 case SystemZ::CondStore32MuxInv:
11004 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
11005 case SystemZ::CondStore8:
11006 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
11007 case SystemZ::CondStore8Inv:
11008 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
11009 case SystemZ::CondStore16:
11010 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
11011 case SystemZ::CondStore16Inv:
11012 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
11013 case SystemZ::CondStore32:
11014 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
11015 case SystemZ::CondStore32Inv:
11016 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
11017 case SystemZ::CondStore64:
11018 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
11019 case SystemZ::CondStore64Inv:
11020 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
11021 case SystemZ::CondStoreF32:
11022 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
11023 case SystemZ::CondStoreF32Inv:
11024 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
11025 case SystemZ::CondStoreF64:
11026 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
11027 case SystemZ::CondStoreF64Inv:
11028 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
11029
11030 case SystemZ::SCmp128Hi:
11031 return emitICmp128Hi(MI, MBB, false);
11032 case SystemZ::UCmp128Hi:
11033 return emitICmp128Hi(MI, MBB, true);
11034
11035 case SystemZ::PAIR128:
11036 return emitPair128(MI, MBB);
11037 case SystemZ::AEXT128:
11038 return emitExt128(MI, MBB, false);
11039 case SystemZ::ZEXT128:
11040 return emitExt128(MI, MBB, true);
11041
11042 case SystemZ::ATOMIC_SWAPW:
11043 return emitAtomicLoadBinary(MI, MBB, 0);
11044
11045 case SystemZ::ATOMIC_LOADW_AR:
11046 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
11047 case SystemZ::ATOMIC_LOADW_AFI:
11048 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
11049
11050 case SystemZ::ATOMIC_LOADW_SR:
11051 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
11052
11053 case SystemZ::ATOMIC_LOADW_NR:
11054 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
11055 case SystemZ::ATOMIC_LOADW_NILH:
11056 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
11057
11058 case SystemZ::ATOMIC_LOADW_OR:
11059 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
11060 case SystemZ::ATOMIC_LOADW_OILH:
11061 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
11062
11063 case SystemZ::ATOMIC_LOADW_XR:
11064 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
11065 case SystemZ::ATOMIC_LOADW_XILF:
11066 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
11067
11068 case SystemZ::ATOMIC_LOADW_NRi:
11069 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
11070 case SystemZ::ATOMIC_LOADW_NILHi:
11071 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
11072
11073 case SystemZ::ATOMIC_LOADW_MIN:
11074 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
11075 case SystemZ::ATOMIC_LOADW_MAX:
11076 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
11077 case SystemZ::ATOMIC_LOADW_UMIN:
11078 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
11079 case SystemZ::ATOMIC_LOADW_UMAX:
11080 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
11081
11082 case SystemZ::ATOMIC_CMP_SWAPW:
11083 return emitAtomicCmpSwapW(MI, MBB);
11084 case SystemZ::MVCImm:
11085 case SystemZ::MVCReg:
11086 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
11087 case SystemZ::NCImm:
11088 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
11089 case SystemZ::OCImm:
11090 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
11091 case SystemZ::XCImm:
11092 case SystemZ::XCReg:
11093 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
11094 case SystemZ::CLCImm:
11095 case SystemZ::CLCReg:
11096 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
11097 case SystemZ::MemsetImmImm:
11098 case SystemZ::MemsetImmReg:
11099 case SystemZ::MemsetRegImm:
11100 case SystemZ::MemsetRegReg:
11101 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
11102 case SystemZ::CLSTLoop:
11103 return emitStringWrapper(MI, MBB, SystemZ::CLST);
11104 case SystemZ::MVSTLoop:
11105 return emitStringWrapper(MI, MBB, SystemZ::MVST);
11106 case SystemZ::SRSTLoop:
11107 return emitStringWrapper(MI, MBB, SystemZ::SRST);
11108 case SystemZ::TBEGIN:
11109 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
11110 case SystemZ::TBEGIN_nofloat:
11111 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
11112 case SystemZ::TBEGINC:
11113 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
11114 case SystemZ::LTEBRCompare_Pseudo:
11115 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
11116 case SystemZ::LTDBRCompare_Pseudo:
11117 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
11118 case SystemZ::LTXBRCompare_Pseudo:
11119 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
11120
11121 case SystemZ::PROBED_ALLOCA:
11122 return emitProbedAlloca(MI, MBB);
11123 case SystemZ::EH_SjLj_SetJmp:
11124 return emitEHSjLjSetJmp(MI, MBB);
11125 case SystemZ::EH_SjLj_LongJmp:
11126 return emitEHSjLjLongJmp(MI, MBB);
11127
11128 case TargetOpcode::STACKMAP:
11129 case TargetOpcode::PATCHPOINT:
11130 return emitPatchPoint(MI, MBB);
11131
11132 default:
11133 llvm_unreachable("Unexpected instr type to insert");
11134 }
11135}
11136
11137// This is only used by the isel schedulers, and is needed only to prevent
11138// compiler from crashing when list-ilp is used.
11139const TargetRegisterClass *
11140SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
11141 if (VT == MVT::Untyped)
11142 return &SystemZ::ADDR128BitRegClass;
11144}
11145
11146SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
11147 SelectionDAG &DAG) const {
11148 SDLoc dl(Op);
11149 /*
11150 The rounding method is in FPC Byte 3 bits 6-7, and has the following
11151 settings:
11152 00 Round to nearest
11153 01 Round to 0
11154 10 Round to +inf
11155 11 Round to -inf
11156
11157 FLT_ROUNDS, on the other hand, expects the following:
11158 -1 Undefined
11159 0 Round to 0
11160 1 Round to nearest
11161 2 Round to +inf
11162 3 Round to -inf
11163 */
11164
11165 // Save FPC to register.
11166 SDValue Chain = Op.getOperand(0);
11167 SDValue EFPC(
11168 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
11169 Chain = EFPC.getValue(1);
11170
11171 // Transform as necessary
11172 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
11173 DAG.getConstant(3, dl, MVT::i32));
11174 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
11175 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
11176 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
11177 DAG.getConstant(1, dl, MVT::i32)));
11178
11179 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
11180 DAG.getConstant(1, dl, MVT::i32));
11181 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
11182
11183 return DAG.getMergeValues({RetVal, Chain}, dl);
11184}
11185
11186SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
11187 SelectionDAG &DAG) const {
11188 EVT VT = Op.getValueType();
11189 Op = Op.getOperand(0);
11190 EVT OpVT = Op.getValueType();
11191
11192 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
11193
11194 SDLoc DL(Op);
11195
11196 // load a 0 vector for the third operand of VSUM.
11197 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));
11198
11199 // execute VSUM.
11200 switch (OpVT.getScalarSizeInBits()) {
11201 case 8:
11202 case 16:
11203 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
11204 [[fallthrough]];
11205 case 32:
11206 case 64:
11207 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
11208 DAG.getBitcast(Op.getValueType(), Zero));
11209 break;
11210 case 128:
11211 break; // VSUM over v1i128 should not happen and would be a noop
11212 default:
11213 llvm_unreachable("Unexpected scalar size.");
11214 }
11215 // Cast to original vector type, retrieve last element.
11216 return DAG.getNode(
11217 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),
11218 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));
11219}
11220
11222 FunctionType *FT = F->getFunctionType();
11223 const AttributeList &Attrs = F->getAttributes();
11224 if (Attrs.hasRetAttrs())
11225 OS << Attrs.getAsString(AttributeList::ReturnIndex) << " ";
11226 OS << *F->getReturnType() << " @" << F->getName() << "(";
11227 for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) {
11228 if (I)
11229 OS << ", ";
11230 OS << *FT->getParamType(I);
11231 AttributeSet ArgAttrs = Attrs.getParamAttrs(I);
11232 for (auto A : {Attribute::SExt, Attribute::ZExt, Attribute::NoExt})
11233 if (ArgAttrs.hasAttribute(A))
11234 OS << " " << Attribute::getNameFromAttrKind(A);
11235 }
11236 OS << ")\n";
11237}
11238
11239bool SystemZTargetLowering::isInternal(const Function *Fn) const {
11240 std::map<const Function *, bool>::iterator Itr = IsInternalCache.find(Fn);
11241 if (Itr == IsInternalCache.end())
11242 Itr = IsInternalCache
11243 .insert(std::pair<const Function *, bool>(
11244 Fn, (Fn->hasLocalLinkage() && !Fn->hasAddressTaken())))
11245 .first;
11246 return Itr->second;
11247}
11248
11249void SystemZTargetLowering::
11250verifyNarrowIntegerArgs_Call(const SmallVectorImpl<ISD::OutputArg> &Outs,
11251 const Function *F, SDValue Callee) const {
11252 // Temporarily only do the check when explicitly requested, until it can be
11253 // enabled by default.
11255 return;
11256
11257 bool IsInternal = false;
11258 const Function *CalleeFn = nullptr;
11259 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
11260 if ((CalleeFn = dyn_cast<Function>(G->getGlobal())))
11261 IsInternal = isInternal(CalleeFn);
11262 if (!IsInternal && !verifyNarrowIntegerArgs(Outs)) {
11263 errs() << "ERROR: Missing extension attribute of passed "
11264 << "value in call to function:\n" << "Callee: ";
11265 if (CalleeFn != nullptr)
11266 printFunctionArgExts(CalleeFn, errs());
11267 else
11268 errs() << "-\n";
11269 errs() << "Caller: ";
11271 llvm_unreachable("");
11272 }
11273}
11274
11275void SystemZTargetLowering::
11276verifyNarrowIntegerArgs_Ret(const SmallVectorImpl<ISD::OutputArg> &Outs,
11277 const Function *F) const {
11278 // Temporarily only do the check when explicitly requested, until it can be
11279 // enabled by default.
11281 return;
11282
11283 if (!isInternal(F) && !verifyNarrowIntegerArgs(Outs)) {
11284 errs() << "ERROR: Missing extension attribute of returned "
11285 << "value from function:\n";
11287 llvm_unreachable("");
11288 }
11289}
11290
11291// Verify that narrow integer arguments are extended as required by the ABI.
11292// Return false if an error is found.
11293bool SystemZTargetLowering::verifyNarrowIntegerArgs(
11294 const SmallVectorImpl<ISD::OutputArg> &Outs) const {
11295 if (!Subtarget.isTargetELF())
11296 return true;
11297
11300 return true;
11301 } else if (!getTargetMachine().Options.VerifyArgABICompliance)
11302 return true;
11303
11304 for (unsigned i = 0; i < Outs.size(); ++i) {
11305 MVT VT = Outs[i].VT;
11306 ISD::ArgFlagsTy Flags = Outs[i].Flags;
11307 if (VT.isInteger()) {
11308 assert((VT == MVT::i32 || VT.getSizeInBits() >= 64) &&
11309 "Unexpected integer argument VT.");
11310 if (VT == MVT::i32 &&
11311 !Flags.isSExt() && !Flags.isZExt() && !Flags.isNoExt())
11312 return false;
11313 }
11314 }
11315
11316 return true;
11317}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
#define Check(C,...)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static bool isSelectPseudo(MachineInstr &MI)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool isUndef(const MachineInstr &MI)
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
uint64_t High
uint64_t IntrinsicInst * II
#define P(N)
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file defines the SmallSet class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static SDValue getI128Select(SelectionDAG &DAG, const SDLoc &DL, Comparison C, SDValue TrueOp, SDValue FalseOp)
static SmallVector< SDValue, 4 > simplifyAssumingCCVal(SDValue &Val, SDValue &CC, SelectionDAG &DAG)
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void printFunctionArgExts(const Function *F, raw_fd_ostream &OS)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static cl::opt< bool > EnableIntArgExtCheck("argext-abi-check", cl::init(false), cl::desc("Verify that narrow int args are properly extended per the " "SystemZ ABI."))
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue lowerAddrSpaceCast(SDValue Op, SelectionDAG &DAG)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static SDValue convertToF16(SDValue Op, SelectionDAG &DAG)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask, SelectionDAG &DAG)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static std::pair< SDValue, int > findCCUse(const SDValue &Val)
static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool analyzeArgSplit(const SmallVectorImpl< ArgTy > &Args, SmallVector< CCValAssign, 16 > &ArgLocs, unsigned I, MVT &PartVT, unsigned &NumParts)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static SDValue convertFromF16(SDValue Op, SDLoc DL, SelectionDAG &DAG)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static unsigned detectEvenOddMultiplyOperand(const SelectionDAG &DAG, const SystemZSubtarget &Subtarget, SDValue &Op)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
Value * RHS
Value * LHS
BinaryOperator * Mul
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1549
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1394
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1521
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1339
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1497
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition APInt.h:323
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:859
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:361
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI StringRef getNameFromAttrKind(Attribute::AttrKind AttrKind)
LLVM Basic Block Representation.
Definition BasicBlock.h:62
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
LLVM_ABI bool isConstant() const
CCState - This class holds information needed while lowering arguments and return values.
LLVM_ABI void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
LLVM_ABI bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
LLVM_ABI void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
MachineConstantPoolValue * getMachineCPVal() const
const Constant * getConstVal() const
uint64_t getZExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
bool hasAddressTaken(const User **=nullptr, bool IgnoreCallbackUses=false, bool IgnoreAssumeLikeCalls=true, bool IngoreLLVMUsed=false, bool IgnoreARCAttachedCall=false, bool IgnoreCastedDirectCall=false) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
Definition Function.cpp:954
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:765
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:777
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:636
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
bool hasInternalLinkage() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setMaxCallFrameSize(uint64_t S)
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
void setFlags(SDNodeFlags NewFlags)
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
LLVM_ABI SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
const_iterator begin() const
Definition SmallSet.h:215
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
size_type size() const
Definition SmallSet.h:170
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:472
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:696
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
iterator end() const
Definition StringRef.h:114
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
A SystemZ-specific class detailing special use registers particular for calling conventions.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, const SDLoc &DL, const AsmOperandInfo &Constraint, SelectionDAG &DAG) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CondMergingParams getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs) const override
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
SDValue useLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, MVT VT, SDValue Arg, SDLoc DL, SDValue Chain, bool IsStrict) const
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Let target indicate that an extending atomic load of the specified type is legal.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:233
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:402
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
A raw_ostream that writes to a file descriptor.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:818
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:787
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:168
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:778
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:852
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
@ STRICT_FMINIMUM
Definition ISDOpcodes.h:471
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:879
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:746
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:909
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:992
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:438
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:843
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:485
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:664
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:786
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BR_JT
BR_JT - Jumptable branch.
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:795
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:671
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:969
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:703
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:764
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ STRICT_FMAXIMUM
Definition ISDOpcodes.h:470
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:849
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:810
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:898
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:887
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:726
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:977
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:804
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:484
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:464
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:478
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:500
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:925
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:505
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:738
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:427
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:958
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:920
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition ISDOpcodes.h:996
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:458
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:162
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:855
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:832
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Define
Register definition.
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned VR16Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned FP16Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition SystemZ.h:41
static bool isImmHH(uint64_t Val)
Definition SystemZ.h:177
const unsigned CCMASK_TEND
Definition SystemZ.h:98
const unsigned CCMASK_CS_EQ
Definition SystemZ.h:68
const unsigned CCMASK_TBEGIN
Definition SystemZ.h:93
const unsigned CCMASK_0
Definition SystemZ.h:28
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition SystemZ.h:83
const unsigned CCMASK_LOGICAL_CARRY
Definition SystemZ.h:61
const unsigned TDCMASK_NORMAL_MINUS
Definition SystemZ.h:123
const unsigned CCMASK_TDC
Definition SystemZ.h:110
const unsigned CCMASK_FCMP
Definition SystemZ.h:49
const unsigned CCMASK_TM_SOME_0
Definition SystemZ.h:82
static bool isImmHL(uint64_t Val)
Definition SystemZ.h:172
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition SystemZ.h:125
const unsigned PFD_READ
Definition SystemZ.h:116
const unsigned CCMASK_1
Definition SystemZ.h:29
const unsigned TDCMASK_NORMAL_PLUS
Definition SystemZ.h:122
const unsigned PFD_WRITE
Definition SystemZ.h:117
const unsigned CCMASK_CMP_GT
Definition SystemZ.h:38
const unsigned TDCMASK_QNAN_MINUS
Definition SystemZ.h:129
const unsigned CCMASK_CS
Definition SystemZ.h:70
const unsigned CCMASK_ANY
Definition SystemZ.h:32
const unsigned CCMASK_ARITH
Definition SystemZ.h:56
const unsigned CCMASK_TM_MIXED_MSB_0
Definition SystemZ.h:79
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition SystemZ.h:124
static bool isImmLL(uint64_t Val)
Definition SystemZ.h:162
const unsigned VectorBits
Definition SystemZ.h:155
static bool isImmLH(uint64_t Val)
Definition SystemZ.h:167
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition SystemZ.h:126
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition SystemZ.h:78
const unsigned IPM_CC
Definition SystemZ.h:113
const unsigned CCMASK_CMP_LE
Definition SystemZ.h:40
const unsigned CCMASK_CMP_O
Definition SystemZ.h:45
const unsigned CCMASK_CMP_EQ
Definition SystemZ.h:36
const unsigned VectorBytes
Definition SystemZ.h:159
const unsigned TDCMASK_INFINITY_MINUS
Definition SystemZ.h:127
const unsigned CCMASK_ICMP
Definition SystemZ.h:48
const unsigned CCMASK_VCMP_ALL
Definition SystemZ.h:102
const unsigned CCMASK_VCMP_NONE
Definition SystemZ.h:104
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition SystemZ.h:105
const unsigned CCMASK_TM_MIXED_MSB_1
Definition SystemZ.h:80
const unsigned CCMASK_TM_MSB_0
Definition SystemZ.h:84
const unsigned CCMASK_ARITH_OVERFLOW
Definition SystemZ.h:55
const unsigned CCMASK_CS_NE
Definition SystemZ.h:69
const unsigned TDCMASK_SNAN_PLUS
Definition SystemZ.h:130
const unsigned CCMASK_TM
Definition SystemZ.h:86
const unsigned CCMASK_3
Definition SystemZ.h:31
const unsigned CCMASK_NONE
Definition SystemZ.h:27
const unsigned CCMASK_CMP_LT
Definition SystemZ.h:37
const unsigned CCMASK_CMP_NE
Definition SystemZ.h:39
const unsigned TDCMASK_ZERO_PLUS
Definition SystemZ.h:120
const unsigned TDCMASK_QNAN_PLUS
Definition SystemZ.h:128
const unsigned TDCMASK_ZERO_MINUS
Definition SystemZ.h:121
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition SystemZ.h:81
const unsigned CCMASK_LOGICAL_BORROW
Definition SystemZ.h:63
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition SystemZ.h:44
const unsigned CCMASK_LOGICAL
Definition SystemZ.h:65
const unsigned CCMASK_TM_MSB_1
Definition SystemZ.h:85
const unsigned TDCMASK_SNAN_MINUS
Definition SystemZ.h:131
initializer< Ty > init(const Ty &Val)
support::ulittle32_t Word
Definition IRSymtab.h:53
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
@ Done
Definition Threading.h:60
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
constexpr T maskLeadingOnes(unsigned N)
Create a bitmask with the N left-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:88
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
@ BeforeLegalizeTypes
Definition DAGCombine.h:16
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
#define EQ(a, b)
Definition regexec.c:65
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:248
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:189
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:175
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:314
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:183
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:148
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
This structure is used to pass arguments to makeLibCall function.