LLVM 19.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
23#include "llvm/IR/GlobalAlias.h"
25#include "llvm/IR/Intrinsics.h"
26#include "llvm/IR/IntrinsicsS390.h"
30#include <cctype>
31#include <optional>
32
33using namespace llvm;
34
35#define DEBUG_TYPE "systemz-lower"
36
37namespace {
38// Represents information about a comparison.
39struct Comparison {
40 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
41 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
42 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
43
44 // The operands to the comparison.
45 SDValue Op0, Op1;
46
47 // Chain if this is a strict floating-point comparison.
48 SDValue Chain;
49
50 // The opcode that should be used to compare Op0 and Op1.
51 unsigned Opcode;
52
53 // A SystemZICMP value. Only used for integer comparisons.
54 unsigned ICmpType;
55
56 // The mask of CC values that Opcode can produce.
57 unsigned CCValid;
58
59 // The mask of CC values for which the original condition is true.
60 unsigned CCMask;
61};
62} // end anonymous namespace
63
64// Classify VT as either 32 or 64 bit.
65static bool is32Bit(EVT VT) {
66 switch (VT.getSimpleVT().SimpleTy) {
67 case MVT::i32:
68 return true;
69 case MVT::i64:
70 return false;
71 default:
72 llvm_unreachable("Unsupported type");
73 }
74}
75
76// Return a version of MachineOperand that can be safely used before the
77// final use.
79 if (Op.isReg())
80 Op.setIsKill(false);
81 return Op;
82}
83
85 const SystemZSubtarget &STI)
86 : TargetLowering(TM), Subtarget(STI) {
87 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
88
89 auto *Regs = STI.getSpecialRegisters();
90
91 // Set up the register classes.
92 if (Subtarget.hasHighWord())
93 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
94 else
95 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
96 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
97 if (!useSoftFloat()) {
98 if (Subtarget.hasVector()) {
99 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
100 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
101 } else {
102 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
103 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
104 }
105 if (Subtarget.hasVectorEnhancements1())
106 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
107 else
108 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
109
110 if (Subtarget.hasVector()) {
111 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
112 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
113 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
114 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
115 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
116 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
117 }
118
119 if (Subtarget.hasVector())
120 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
121 }
122
123 // Compute derived properties from the register classes
125
126 // Set up special registers.
127 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
128
129 // TODO: It may be better to default to latency-oriented scheduling, however
130 // LLVM's current latency-oriented scheduler can't handle physreg definitions
131 // such as SystemZ has with CC, so set this to the register-pressure
132 // scheduler, because it can.
134
137
139
140 // Instructions are strings of 2-byte aligned 2-byte values.
142 // For performance reasons we prefer 16-byte alignment.
144
145 // Handle operations that are handled in a similar way for all types.
146 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
147 I <= MVT::LAST_FP_VALUETYPE;
148 ++I) {
150 if (isTypeLegal(VT)) {
151 // Lower SET_CC into an IPM-based sequence.
155
156 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
158
159 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
162 }
163 }
164
165 // Expand jump table branches as address arithmetic followed by an
166 // indirect jump.
168
169 // Expand BRCOND into a BR_CC (see above).
171
172 // Handle integer types except i128.
173 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
174 I <= MVT::LAST_INTEGER_VALUETYPE;
175 ++I) {
177 if (isTypeLegal(VT) && VT != MVT::i128) {
179
180 // Expand individual DIV and REMs into DIVREMs.
187
188 // Support addition/subtraction with overflow.
191
192 // Support addition/subtraction with carry.
195
196 // Support carry in as value rather than glue.
199
200 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
201 // available, or if the operand is constant.
203
204 // Use POPCNT on z196 and above.
205 if (Subtarget.hasPopulationCount())
207 else
209
210 // No special instructions for these.
213
214 // Use *MUL_LOHI where possible instead of MULH*.
219
220 // Only z196 and above have native support for conversions to unsigned.
221 // On z10, promoting to i64 doesn't generate an inexact condition for
222 // values that are outside the i32 range but in the i64 range, so use
223 // the default expansion.
224 if (!Subtarget.hasFPExtension())
226
227 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
228 // default to Expand, so need to be modified to Legal where appropriate.
230 if (Subtarget.hasFPExtension())
232
233 // And similarly for STRICT_[SU]INT_TO_FP.
235 if (Subtarget.hasFPExtension())
237 }
238 }
239
240 // Handle i128 if legal.
241 if (isTypeLegal(MVT::i128)) {
242 // No special instructions for these.
258
259 // Support addition/subtraction with carry.
264
265 // Use VPOPCT and add up partial results.
267
268 // We have to use libcalls for these.
277 }
278
279 // Type legalization will convert 8- and 16-bit atomic operations into
280 // forms that operate on i32s (but still keeping the original memory VT).
281 // Lower them into full i32 operations.
293
294 // Whether or not i128 is not a legal type, we need to custom lower
295 // the atomic operations in order to exploit SystemZ instructions.
300
301 // Mark sign/zero extending atomic loads as legal, which will make
302 // DAGCombiner fold extensions into atomic loads if possible.
304 {MVT::i8, MVT::i16, MVT::i32}, Legal);
306 {MVT::i8, MVT::i16}, Legal);
308 MVT::i8, Legal);
309
310 // We can use the CC result of compare-and-swap to implement
311 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
315
317
318 // Traps are legal, as we will convert them to "j .+2".
319 setOperationAction(ISD::TRAP, MVT::Other, Legal);
320
321 // z10 has instructions for signed but not unsigned FP conversion.
322 // Handle unsigned 32-bit types as signed 64-bit types.
323 if (!Subtarget.hasFPExtension()) {
328 }
329
330 // We have native support for a 64-bit CTLZ, via FLOGR.
334
335 // On z15 we have native support for a 64-bit CTPOP.
336 if (Subtarget.hasMiscellaneousExtensions3()) {
339 }
340
341 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
343
344 // Expand 128 bit shifts without using a libcall.
348 setLibcallName(RTLIB::SRL_I128, nullptr);
349 setLibcallName(RTLIB::SHL_I128, nullptr);
350 setLibcallName(RTLIB::SRA_I128, nullptr);
351
352 // Also expand 256 bit shifts if i128 is a legal type.
353 if (isTypeLegal(MVT::i128)) {
357 }
358
359 // Handle bitcast from fp128 to i128.
360 if (!isTypeLegal(MVT::i128))
362
363 // We have native instructions for i8, i16 and i32 extensions, but not i1.
365 for (MVT VT : MVT::integer_valuetypes()) {
369 }
370
371 // Handle the various types of symbolic address.
377
378 // We need to handle dynamic allocations specially because of the
379 // 160-byte area at the bottom of the stack.
382
385
386 // Handle prefetches with PFD or PFDRL.
388
389 // Handle readcyclecounter with STCKF.
391
393 // Assume by default that all vector operations need to be expanded.
394 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
395 if (getOperationAction(Opcode, VT) == Legal)
396 setOperationAction(Opcode, VT, Expand);
397
398 // Likewise all truncating stores and extending loads.
399 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
400 setTruncStoreAction(VT, InnerVT, Expand);
403 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
404 }
405
406 if (isTypeLegal(VT)) {
407 // These operations are legal for anything that can be stored in a
408 // vector register, even if there is no native support for the format
409 // as such. In particular, we can do these for v4f32 even though there
410 // are no specific instructions for that format.
416
417 // Likewise, except that we need to replace the nodes with something
418 // more specific.
421 }
422 }
423
424 // Handle integer vector types.
426 if (isTypeLegal(VT)) {
427 // These operations have direct equivalents.
432 if (VT != MVT::v2i64)
438 if (Subtarget.hasVectorEnhancements1())
440 else
444
445 // Convert a GPR scalar to a vector by inserting it into element 0.
447
448 // Use a series of unpacks for extensions.
451
452 // Detect shifts/rotates by a scalar amount and convert them into
453 // V*_BY_SCALAR.
458
459 // Add ISD::VECREDUCE_ADD as custom in order to implement
460 // it with VZERO+VSUM
462
463 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
464 // and inverting the result as necessary.
466 }
467 }
468
469 if (Subtarget.hasVector()) {
470 // There should be no need to check for float types other than v2f64
471 // since <2 x f32> isn't a legal type.
480
489 }
490
491 if (Subtarget.hasVectorEnhancements2()) {
500
509 }
510
511 // Handle floating-point types.
512 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
513 I <= MVT::LAST_FP_VALUETYPE;
514 ++I) {
516 if (isTypeLegal(VT)) {
517 // We can use FI for FRINT.
519
520 // We can use the extended form of FI for other rounding operations.
521 if (Subtarget.hasFPExtension()) {
527 }
528
529 // No special instructions for these.
535
536 // Special treatment.
538
539 // Handle constrained floating-point operations.
549 if (Subtarget.hasFPExtension()) {
555 }
556 }
557 }
558
559 // Handle floating-point vector types.
560 if (Subtarget.hasVector()) {
561 // Scalar-to-vector conversion is just a subreg.
564
565 // Some insertions and extractions can be done directly but others
566 // need to go via integers.
571
572 // These operations have direct equivalents.
573 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
574 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
575 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
576 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
577 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
578 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
579 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
580 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
581 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
584 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
587
588 // Handle constrained floating-point operations.
601
606 if (Subtarget.hasVectorEnhancements1()) {
609 }
610 }
611
612 // The vector enhancements facility 1 has instructions for these.
613 if (Subtarget.hasVectorEnhancements1()) {
614 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
615 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
616 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
617 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
618 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
619 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
620 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
621 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
622 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
625 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
628
633
638
643
648
653
654 // Handle constrained floating-point operations.
667 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
668 MVT::v4f32, MVT::v2f64 }) {
673 }
674 }
675
676 // We only have fused f128 multiply-addition on vector registers.
677 if (!Subtarget.hasVectorEnhancements1()) {
680 }
681
682 // We don't have a copysign instruction on vector registers.
683 if (Subtarget.hasVectorEnhancements1())
685
686 // Needed so that we don't try to implement f128 constant loads using
687 // a load-and-extend of a f80 constant (in cases where the constant
688 // would fit in an f80).
689 for (MVT VT : MVT::fp_valuetypes())
690 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
691
692 // We don't have extending load instruction on vector registers.
693 if (Subtarget.hasVectorEnhancements1()) {
694 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
695 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
696 }
697
698 // Floating-point truncation and stores need to be done separately.
699 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
700 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
701 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
702
703 // We have 64-bit FPR<->GPR moves, but need special handling for
704 // 32-bit forms.
705 if (!Subtarget.hasVector()) {
708 }
709
710 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
711 // structure, but VAEND is a no-op.
715
717
718 // Codes for which we want to perform some z-specific combinations.
722 ISD::LOAD,
733 ISD::SDIV,
734 ISD::UDIV,
735 ISD::SREM,
736 ISD::UREM,
739
740 // Handle intrinsics.
743
744 // We want to use MVC in preference to even a single load/store pair.
745 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
747
748 // The main memset sequence is a byte store followed by an MVC.
749 // Two STC or MV..I stores win over that, but the kind of fused stores
750 // generated by target-independent code don't when the byte value is
751 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
752 // than "STC;MVC". Handle the choice in target-specific code instead.
753 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
755
756 // Default to having -disable-strictnode-mutation on
757 IsStrictFPEnabled = true;
758
759 if (Subtarget.isTargetzOS()) {
760 struct RTLibCallMapping {
761 RTLIB::Libcall Code;
762 const char *Name;
763 };
764 static RTLibCallMapping RTLibCallCommon[] = {
765#define HANDLE_LIBCALL(code, name) {RTLIB::code, name},
766#include "ZOSLibcallNames.def"
767 };
768 for (auto &E : RTLibCallCommon)
769 setLibcallName(E.Code, E.Name);
770 }
771}
772
774 return Subtarget.hasSoftFloat();
775}
776
778 LLVMContext &, EVT VT) const {
779 if (!VT.isVector())
780 return MVT::i32;
782}
783
785 const MachineFunction &MF, EVT VT) const {
786 VT = VT.getScalarType();
787
788 if (!VT.isSimple())
789 return false;
790
791 switch (VT.getSimpleVT().SimpleTy) {
792 case MVT::f32:
793 case MVT::f64:
794 return true;
795 case MVT::f128:
796 return Subtarget.hasVectorEnhancements1();
797 default:
798 break;
799 }
800
801 return false;
802}
803
804// Return true if the constant can be generated with a vector instruction,
805// such as VGM, VGMB or VREPI.
807 const SystemZSubtarget &Subtarget) {
808 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
809 if (!Subtarget.hasVector() ||
810 (isFP128 && !Subtarget.hasVectorEnhancements1()))
811 return false;
812
813 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
814 // preferred way of creating all-zero and all-one vectors so give it
815 // priority over other methods below.
816 unsigned Mask = 0;
817 unsigned I = 0;
818 for (; I < SystemZ::VectorBytes; ++I) {
819 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
820 if (Byte == 0xff)
821 Mask |= 1ULL << I;
822 else if (Byte != 0)
823 break;
824 }
825 if (I == SystemZ::VectorBytes) {
827 OpVals.push_back(Mask);
829 return true;
830 }
831
832 if (SplatBitSize > 64)
833 return false;
834
835 auto tryValue = [&](uint64_t Value) -> bool {
836 // Try VECTOR REPLICATE IMMEDIATE
837 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
838 if (isInt<16>(SignedValue)) {
839 OpVals.push_back(((unsigned) SignedValue));
842 SystemZ::VectorBits / SplatBitSize);
843 return true;
844 }
845 // Try VECTOR GENERATE MASK
846 unsigned Start, End;
847 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
848 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
849 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
850 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
851 OpVals.push_back(Start - (64 - SplatBitSize));
852 OpVals.push_back(End - (64 - SplatBitSize));
855 SystemZ::VectorBits / SplatBitSize);
856 return true;
857 }
858 return false;
859 };
860
861 // First try assuming that any undefined bits above the highest set bit
862 // and below the lowest set bit are 1s. This increases the likelihood of
863 // being able to use a sign-extended element value in VECTOR REPLICATE
864 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
865 uint64_t SplatBitsZ = SplatBits.getZExtValue();
866 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
867 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
868 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
869 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
870 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
871 if (tryValue(SplatBitsZ | Upper | Lower))
872 return true;
873
874 // Now try assuming that any undefined bits between the first and
875 // last defined set bits are set. This increases the chances of
876 // using a non-wraparound mask.
877 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
878 return tryValue(SplatBitsZ | Middle);
879}
880
882 if (IntImm.isSingleWord()) {
883 IntBits = APInt(128, IntImm.getZExtValue());
884 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
885 } else
886 IntBits = IntImm;
887 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
888
889 // Find the smallest splat.
890 SplatBits = IntImm;
891 unsigned Width = SplatBits.getBitWidth();
892 while (Width > 8) {
893 unsigned HalfSize = Width / 2;
894 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
895 APInt LowValue = SplatBits.trunc(HalfSize);
896
897 // If the two halves do not match, stop here.
898 if (HighValue != LowValue || 8 > HalfSize)
899 break;
900
901 SplatBits = HighValue;
902 Width = HalfSize;
903 }
904 SplatUndef = 0;
905 SplatBitSize = Width;
906}
907
909 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
910 bool HasAnyUndefs;
911
912 // Get IntBits by finding the 128 bit splat.
913 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
914 true);
915
916 // Get SplatBits by finding the 8 bit or greater splat.
917 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
918 true);
919}
920
922 bool ForCodeSize) const {
923 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
924 if (Imm.isZero() || Imm.isNegZero())
925 return true;
926
928}
929
930/// Returns true if stack probing through inline assembly is requested.
932 // If the function specifically requests inline stack probes, emit them.
933 if (MF.getFunction().hasFnAttribute("probe-stack"))
934 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
935 "inline-asm";
936 return false;
937}
938
942}
943
947}
948
951 // Don't expand subword operations as they require special treatment.
952 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
954
955 // Don't expand if there is a target instruction available.
956 if (Subtarget.hasInterlockedAccess1() &&
957 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
964
966}
967
969 // We can use CGFI or CLGFI.
970 return isInt<32>(Imm) || isUInt<32>(Imm);
971}
972
974 // We can use ALGFI or SLGFI.
975 return isUInt<32>(Imm) || isUInt<32>(-Imm);
976}
977
979 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
980 // Unaligned accesses should never be slower than the expanded version.
981 // We check specifically for aligned accesses in the few cases where
982 // they are required.
983 if (Fast)
984 *Fast = 1;
985 return true;
986}
987
988// Information about the addressing mode for a memory access.
990 // True if a long displacement is supported.
992
993 // True if use of index register is supported.
995
996 AddressingMode(bool LongDispl, bool IdxReg) :
997 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
998};
999
1000// Return the desired addressing mode for a Load which has only one use (in
1001// the same block) which is a Store.
1003 Type *Ty) {
1004 // With vector support a Load->Store combination may be combined to either
1005 // an MVC or vector operations and it seems to work best to allow the
1006 // vector addressing mode.
1007 if (HasVector)
1008 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1009
1010 // Otherwise only the MVC case is special.
1011 bool MVC = Ty->isIntegerTy(8);
1012 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1013}
1014
1015// Return the addressing mode which seems most desirable given an LLVM
1016// Instruction pointer.
1017static AddressingMode
1019 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1020 switch (II->getIntrinsicID()) {
1021 default: break;
1022 case Intrinsic::memset:
1023 case Intrinsic::memmove:
1024 case Intrinsic::memcpy:
1025 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1026 }
1027 }
1028
1029 if (isa<LoadInst>(I) && I->hasOneUse()) {
1030 auto *SingleUser = cast<Instruction>(*I->user_begin());
1031 if (SingleUser->getParent() == I->getParent()) {
1032 if (isa<ICmpInst>(SingleUser)) {
1033 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1034 if (C->getBitWidth() <= 64 &&
1035 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1036 // Comparison of memory with 16 bit signed / unsigned immediate
1037 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1038 } else if (isa<StoreInst>(SingleUser))
1039 // Load->Store
1040 return getLoadStoreAddrMode(HasVector, I->getType());
1041 }
1042 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1043 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1044 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1045 // Load->Store
1046 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1047 }
1048
1049 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1050
1051 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1052 // dependencies (LDE only supports small offsets).
1053 // * Utilize the vector registers to hold floating point
1054 // values (vector load / store instructions only support small
1055 // offsets).
1056
1057 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1058 I->getOperand(0)->getType());
1059 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1060 bool IsVectorAccess = MemAccessTy->isVectorTy();
1061
1062 // A store of an extracted vector element will be combined into a VSTE type
1063 // instruction.
1064 if (!IsVectorAccess && isa<StoreInst>(I)) {
1065 Value *DataOp = I->getOperand(0);
1066 if (isa<ExtractElementInst>(DataOp))
1067 IsVectorAccess = true;
1068 }
1069
1070 // A load which gets inserted into a vector element will be combined into a
1071 // VLE type instruction.
1072 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1073 User *LoadUser = *I->user_begin();
1074 if (isa<InsertElementInst>(LoadUser))
1075 IsVectorAccess = true;
1076 }
1077
1078 if (IsFPAccess || IsVectorAccess)
1079 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1080 }
1081
1082 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1083}
1084
1086 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1087 // Punt on globals for now, although they can be used in limited
1088 // RELATIVE LONG cases.
1089 if (AM.BaseGV)
1090 return false;
1091
1092 // Require a 20-bit signed offset.
1093 if (!isInt<20>(AM.BaseOffs))
1094 return false;
1095
1096 bool RequireD12 =
1097 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1098 AddressingMode SupportedAM(!RequireD12, true);
1099 if (I != nullptr)
1100 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1101
1102 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1103 return false;
1104
1105 if (!SupportedAM.IndexReg)
1106 // No indexing allowed.
1107 return AM.Scale == 0;
1108 else
1109 // Indexing is OK but no scale factor can be applied.
1110 return AM.Scale == 0 || AM.Scale == 1;
1111}
1112
1114 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
1115 unsigned SrcAS, const AttributeList &FuncAttributes) const {
1116 const int MVCFastLen = 16;
1117
1118 if (Limit != ~unsigned(0)) {
1119 // Don't expand Op into scalar loads/stores in these cases:
1120 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1121 return false; // Small memcpy: Use MVC
1122 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1123 return false; // Small memset (first byte with STC/MVI): Use MVC
1124 if (Op.isZeroMemset())
1125 return false; // Memset zero: Use XC
1126 }
1127
1128 return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
1129 SrcAS, FuncAttributes);
1130}
1131
1133 const AttributeList &FuncAttributes) const {
1134 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1135}
1136
1137bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1138 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1139 return false;
1140 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1141 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1142 return FromBits > ToBits;
1143}
1144
1146 if (!FromVT.isInteger() || !ToVT.isInteger())
1147 return false;
1148 unsigned FromBits = FromVT.getFixedSizeInBits();
1149 unsigned ToBits = ToVT.getFixedSizeInBits();
1150 return FromBits > ToBits;
1151}
1152
1153//===----------------------------------------------------------------------===//
1154// Inline asm support
1155//===----------------------------------------------------------------------===//
1156
1159 if (Constraint.size() == 1) {
1160 switch (Constraint[0]) {
1161 case 'a': // Address register
1162 case 'd': // Data register (equivalent to 'r')
1163 case 'f': // Floating-point register
1164 case 'h': // High-part register
1165 case 'r': // General-purpose register
1166 case 'v': // Vector register
1167 return C_RegisterClass;
1168
1169 case 'Q': // Memory with base and unsigned 12-bit displacement
1170 case 'R': // Likewise, plus an index
1171 case 'S': // Memory with base and signed 20-bit displacement
1172 case 'T': // Likewise, plus an index
1173 case 'm': // Equivalent to 'T'.
1174 return C_Memory;
1175
1176 case 'I': // Unsigned 8-bit constant
1177 case 'J': // Unsigned 12-bit constant
1178 case 'K': // Signed 16-bit constant
1179 case 'L': // Signed 20-bit displacement (on all targets we support)
1180 case 'M': // 0x7fffffff
1181 return C_Immediate;
1182
1183 default:
1184 break;
1185 }
1186 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1187 switch (Constraint[1]) {
1188 case 'Q': // Address with base and unsigned 12-bit displacement
1189 case 'R': // Likewise, plus an index
1190 case 'S': // Address with base and signed 20-bit displacement
1191 case 'T': // Likewise, plus an index
1192 return C_Address;
1193
1194 default:
1195 break;
1196 }
1197 }
1198 return TargetLowering::getConstraintType(Constraint);
1199}
1200
1203 const char *constraint) const {
1205 Value *CallOperandVal = info.CallOperandVal;
1206 // If we don't have a value, we can't do a match,
1207 // but allow it at the lowest weight.
1208 if (!CallOperandVal)
1209 return CW_Default;
1210 Type *type = CallOperandVal->getType();
1211 // Look at the constraint type.
1212 switch (*constraint) {
1213 default:
1215 break;
1216
1217 case 'a': // Address register
1218 case 'd': // Data register (equivalent to 'r')
1219 case 'h': // High-part register
1220 case 'r': // General-purpose register
1221 weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1222 break;
1223
1224 case 'f': // Floating-point register
1225 if (!useSoftFloat())
1226 weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1227 break;
1228
1229 case 'v': // Vector register
1230 if (Subtarget.hasVector())
1231 weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1232 : CW_Default;
1233 break;
1234
1235 case 'I': // Unsigned 8-bit constant
1236 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1237 if (isUInt<8>(C->getZExtValue()))
1238 weight = CW_Constant;
1239 break;
1240
1241 case 'J': // Unsigned 12-bit constant
1242 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1243 if (isUInt<12>(C->getZExtValue()))
1244 weight = CW_Constant;
1245 break;
1246
1247 case 'K': // Signed 16-bit constant
1248 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1249 if (isInt<16>(C->getSExtValue()))
1250 weight = CW_Constant;
1251 break;
1252
1253 case 'L': // Signed 20-bit displacement (on all targets we support)
1254 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1255 if (isInt<20>(C->getSExtValue()))
1256 weight = CW_Constant;
1257 break;
1258
1259 case 'M': // 0x7fffffff
1260 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1261 if (C->getZExtValue() == 0x7fffffff)
1262 weight = CW_Constant;
1263 break;
1264 }
1265 return weight;
1266}
1267
1268// Parse a "{tNNN}" register constraint for which the register type "t"
1269// has already been verified. MC is the class associated with "t" and
1270// Map maps 0-based register numbers to LLVM register numbers.
1271static std::pair<unsigned, const TargetRegisterClass *>
1273 const unsigned *Map, unsigned Size) {
1274 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1275 if (isdigit(Constraint[2])) {
1276 unsigned Index;
1277 bool Failed =
1278 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1279 if (!Failed && Index < Size && Map[Index])
1280 return std::make_pair(Map[Index], RC);
1281 }
1282 return std::make_pair(0U, nullptr);
1283}
1284
1285std::pair<unsigned, const TargetRegisterClass *>
1287 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1288 if (Constraint.size() == 1) {
1289 // GCC Constraint Letters
1290 switch (Constraint[0]) {
1291 default: break;
1292 case 'd': // Data register (equivalent to 'r')
1293 case 'r': // General-purpose register
1294 if (VT.getSizeInBits() == 64)
1295 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1296 else if (VT.getSizeInBits() == 128)
1297 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1298 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1299
1300 case 'a': // Address register
1301 if (VT == MVT::i64)
1302 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1303 else if (VT == MVT::i128)
1304 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1305 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1306
1307 case 'h': // High-part register (an LLVM extension)
1308 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1309
1310 case 'f': // Floating-point register
1311 if (!useSoftFloat()) {
1312 if (VT.getSizeInBits() == 64)
1313 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1314 else if (VT.getSizeInBits() == 128)
1315 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1316 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1317 }
1318 break;
1319
1320 case 'v': // Vector register
1321 if (Subtarget.hasVector()) {
1322 if (VT.getSizeInBits() == 32)
1323 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1324 if (VT.getSizeInBits() == 64)
1325 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1326 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1327 }
1328 break;
1329 }
1330 }
1331 if (Constraint.starts_with("{")) {
1332
1333 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1334 // to check the size on.
1335 auto getVTSizeInBits = [&VT]() {
1336 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1337 };
1338
1339 // We need to override the default register parsing for GPRs and FPRs
1340 // because the interpretation depends on VT. The internal names of
1341 // the registers are also different from the external names
1342 // (F0D and F0S instead of F0, etc.).
1343 if (Constraint[1] == 'r') {
1344 if (getVTSizeInBits() == 32)
1345 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1347 if (getVTSizeInBits() == 128)
1348 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1350 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1352 }
1353 if (Constraint[1] == 'f') {
1354 if (useSoftFloat())
1355 return std::make_pair(
1356 0u, static_cast<const TargetRegisterClass *>(nullptr));
1357 if (getVTSizeInBits() == 32)
1358 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1360 if (getVTSizeInBits() == 128)
1361 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1363 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1365 }
1366 if (Constraint[1] == 'v') {
1367 if (!Subtarget.hasVector())
1368 return std::make_pair(
1369 0u, static_cast<const TargetRegisterClass *>(nullptr));
1370 if (getVTSizeInBits() == 32)
1371 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1373 if (getVTSizeInBits() == 64)
1374 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1376 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1378 }
1379 }
1380 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1381}
1382
1383// FIXME? Maybe this could be a TableGen attribute on some registers and
1384// this table could be generated automatically from RegInfo.
1387 const MachineFunction &MF) const {
1388 Register Reg =
1390 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D : 0)
1391 .Case("r15", Subtarget.isTargetELF() ? SystemZ::R15D : 0)
1392 .Default(0);
1393
1394 if (Reg)
1395 return Reg;
1396 report_fatal_error("Invalid register name global variable");
1397}
1398
1400 const Constant *PersonalityFn) const {
1401 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1402}
1403
1405 const Constant *PersonalityFn) const {
1406 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1407}
1408
1410 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1411 SelectionDAG &DAG) const {
1412 // Only support length 1 constraints for now.
1413 if (Constraint.size() == 1) {
1414 switch (Constraint[0]) {
1415 case 'I': // Unsigned 8-bit constant
1416 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1417 if (isUInt<8>(C->getZExtValue()))
1418 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1419 Op.getValueType()));
1420 return;
1421
1422 case 'J': // Unsigned 12-bit constant
1423 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1424 if (isUInt<12>(C->getZExtValue()))
1425 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1426 Op.getValueType()));
1427 return;
1428
1429 case 'K': // Signed 16-bit constant
1430 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1431 if (isInt<16>(C->getSExtValue()))
1432 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1433 Op.getValueType()));
1434 return;
1435
1436 case 'L': // Signed 20-bit displacement (on all targets we support)
1437 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1438 if (isInt<20>(C->getSExtValue()))
1439 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1440 Op.getValueType()));
1441 return;
1442
1443 case 'M': // 0x7fffffff
1444 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1445 if (C->getZExtValue() == 0x7fffffff)
1446 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1447 Op.getValueType()));
1448 return;
1449 }
1450 }
1451 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1452}
1453
1454//===----------------------------------------------------------------------===//
1455// Calling conventions
1456//===----------------------------------------------------------------------===//
1457
1458#include "SystemZGenCallingConv.inc"
1459
1461 CallingConv::ID) const {
1462 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1463 SystemZ::R14D, 0 };
1464 return ScratchRegs;
1465}
1466
1468 Type *ToType) const {
1469 return isTruncateFree(FromType, ToType);
1470}
1471
1473 return CI->isTailCall();
1474}
1475
1476// Value is a value that has been passed to us in the location described by VA
1477// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1478// any loads onto Chain.
1480 CCValAssign &VA, SDValue Chain,
1481 SDValue Value) {
1482 // If the argument has been promoted from a smaller type, insert an
1483 // assertion to capture this.
1484 if (VA.getLocInfo() == CCValAssign::SExt)
1486 DAG.getValueType(VA.getValVT()));
1487 else if (VA.getLocInfo() == CCValAssign::ZExt)
1489 DAG.getValueType(VA.getValVT()));
1490
1491 if (VA.isExtInLoc())
1492 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1493 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1494 // If this is a short vector argument loaded from the stack,
1495 // extend from i64 to full vector size and then bitcast.
1496 assert(VA.getLocVT() == MVT::i64);
1497 assert(VA.getValVT().isVector());
1498 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1499 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1500 } else
1501 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1502 return Value;
1503}
1504
1505// Value is a value of type VA.getValVT() that we need to copy into
1506// the location described by VA. Return a copy of Value converted to
1507// VA.getValVT(). The caller is responsible for handling indirect values.
1509 CCValAssign &VA, SDValue Value) {
1510 switch (VA.getLocInfo()) {
1511 case CCValAssign::SExt:
1512 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1513 case CCValAssign::ZExt:
1514 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1515 case CCValAssign::AExt:
1516 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1517 case CCValAssign::BCvt: {
1518 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1519 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1520 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1521 // For an f32 vararg we need to first promote it to an f64 and then
1522 // bitcast it to an i64.
1523 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1524 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1525 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1526 ? MVT::v2i64
1527 : VA.getLocVT();
1528 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1529 // For ELF, this is a short vector argument to be stored to the stack,
1530 // bitcast to v2i64 and then extract first element.
1531 if (BitCastToType == MVT::v2i64)
1532 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1533 DAG.getConstant(0, DL, MVT::i32));
1534 return Value;
1535 }
1536 case CCValAssign::Full:
1537 return Value;
1538 default:
1539 llvm_unreachable("Unhandled getLocInfo()");
1540 }
1541}
1542
1544 SDLoc DL(In);
1545 SDValue Lo, Hi;
1546 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1547 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1548 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1549 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1550 DAG.getConstant(64, DL, MVT::i32)));
1551 } else {
1552 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1553 }
1554
1555 // FIXME: If v2i64 were a legal type, we could use it instead of
1556 // Untyped here. This might enable improved folding.
1557 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1558 MVT::Untyped, Hi, Lo);
1559 return SDValue(Pair, 0);
1560}
1561
1563 SDLoc DL(In);
1564 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1565 DL, MVT::i64, In);
1566 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1567 DL, MVT::i64, In);
1568
1569 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1570 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1571 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1572 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1573 DAG.getConstant(64, DL, MVT::i32));
1574 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1575 } else {
1576 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1577 }
1578}
1579
1581 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1582 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1583 EVT ValueVT = Val.getValueType();
1584 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1585 // Inline assembly operand.
1586 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1587 return true;
1588 }
1589
1590 return false;
1591}
1592
1594 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1595 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1596 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1597 // Inline assembly operand.
1598 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1599 return DAG.getBitcast(ValueVT, Res);
1600 }
1601
1602 return SDValue();
1603}
1604
1606 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1607 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1608 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1610 MachineFrameInfo &MFI = MF.getFrameInfo();
1612 SystemZMachineFunctionInfo *FuncInfo =
1614 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1615 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1616
1617 // Assign locations to all of the incoming arguments.
1619 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1620 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1621 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
1622
1623 unsigned NumFixedGPRs = 0;
1624 unsigned NumFixedFPRs = 0;
1625 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1626 SDValue ArgValue;
1627 CCValAssign &VA = ArgLocs[I];
1628 EVT LocVT = VA.getLocVT();
1629 if (VA.isRegLoc()) {
1630 // Arguments passed in registers
1631 const TargetRegisterClass *RC;
1632 switch (LocVT.getSimpleVT().SimpleTy) {
1633 default:
1634 // Integers smaller than i64 should be promoted to i64.
1635 llvm_unreachable("Unexpected argument type");
1636 case MVT::i32:
1637 NumFixedGPRs += 1;
1638 RC = &SystemZ::GR32BitRegClass;
1639 break;
1640 case MVT::i64:
1641 NumFixedGPRs += 1;
1642 RC = &SystemZ::GR64BitRegClass;
1643 break;
1644 case MVT::f32:
1645 NumFixedFPRs += 1;
1646 RC = &SystemZ::FP32BitRegClass;
1647 break;
1648 case MVT::f64:
1649 NumFixedFPRs += 1;
1650 RC = &SystemZ::FP64BitRegClass;
1651 break;
1652 case MVT::f128:
1653 NumFixedFPRs += 2;
1654 RC = &SystemZ::FP128BitRegClass;
1655 break;
1656 case MVT::v16i8:
1657 case MVT::v8i16:
1658 case MVT::v4i32:
1659 case MVT::v2i64:
1660 case MVT::v4f32:
1661 case MVT::v2f64:
1662 RC = &SystemZ::VR128BitRegClass;
1663 break;
1664 }
1665
1666 Register VReg = MRI.createVirtualRegister(RC);
1667 MRI.addLiveIn(VA.getLocReg(), VReg);
1668 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1669 } else {
1670 assert(VA.isMemLoc() && "Argument not register or memory");
1671
1672 // Create the frame index object for this incoming parameter.
1673 // FIXME: Pre-include call frame size in the offset, should not
1674 // need to manually add it here.
1675 int64_t ArgSPOffset = VA.getLocMemOffset();
1676 if (Subtarget.isTargetXPLINK64()) {
1677 auto &XPRegs =
1679 ArgSPOffset += XPRegs.getCallFrameSize();
1680 }
1681 int FI =
1682 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1683
1684 // Create the SelectionDAG nodes corresponding to a load
1685 // from this parameter. Unpromoted ints and floats are
1686 // passed as right-justified 8-byte values.
1687 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1688 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1689 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1690 DAG.getIntPtrConstant(4, DL));
1691 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1693 }
1694
1695 // Convert the value of the argument register into the value that's
1696 // being passed.
1697 if (VA.getLocInfo() == CCValAssign::Indirect) {
1698 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1700 // If the original argument was split (e.g. i128), we need
1701 // to load all parts of it here (using the same address).
1702 unsigned ArgIndex = Ins[I].OrigArgIndex;
1703 assert (Ins[I].PartOffset == 0);
1704 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1705 CCValAssign &PartVA = ArgLocs[I + 1];
1706 unsigned PartOffset = Ins[I + 1].PartOffset;
1707 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1708 DAG.getIntPtrConstant(PartOffset, DL));
1709 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1711 ++I;
1712 }
1713 } else
1714 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1715 }
1716
1717 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
1718 // Save the number of non-varargs registers for later use by va_start, etc.
1719 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1720 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1721
1722 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1723 Subtarget.getSpecialRegisters());
1724
1725 // Likewise the address (in the form of a frame index) of where the
1726 // first stack vararg would be. The 1-byte size here is arbitrary.
1727 // FIXME: Pre-include call frame size in the offset, should not
1728 // need to manually add it here.
1729 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
1730 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
1731 FuncInfo->setVarArgsFrameIndex(FI);
1732 }
1733
1734 if (IsVarArg && Subtarget.isTargetELF()) {
1735 // Save the number of non-varargs registers for later use by va_start, etc.
1736 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1737 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1738
1739 // Likewise the address (in the form of a frame index) of where the
1740 // first stack vararg would be. The 1-byte size here is arbitrary.
1741 int64_t VarArgsOffset = CCInfo.getStackSize();
1742 FuncInfo->setVarArgsFrameIndex(
1743 MFI.CreateFixedObject(1, VarArgsOffset, true));
1744
1745 // ...and a similar frame index for the caller-allocated save area
1746 // that will be used to store the incoming registers.
1747 int64_t RegSaveOffset =
1748 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1749 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1750 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1751
1752 // Store the FPR varargs in the reserved frame slots. (We store the
1753 // GPRs as part of the prologue.)
1754 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1756 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1757 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1758 int FI =
1760 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1762 &SystemZ::FP64BitRegClass);
1763 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1764 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1766 }
1767 // Join the stores, which are independent of one another.
1768 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1769 ArrayRef(&MemOps[NumFixedFPRs],
1770 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
1771 }
1772 }
1773
1774 if (Subtarget.isTargetXPLINK64()) {
1775 // Create virual register for handling incoming "ADA" special register (R5)
1776 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
1777 Register ADAvReg = MRI.createVirtualRegister(RC);
1778 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1779 Subtarget.getSpecialRegisters());
1780 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
1781 FuncInfo->setADAVirtualRegister(ADAvReg);
1782 }
1783 return Chain;
1784}
1785
1786static bool canUseSiblingCall(const CCState &ArgCCInfo,
1789 // Punt if there are any indirect or stack arguments, or if the call
1790 // needs the callee-saved argument register R6, or if the call uses
1791 // the callee-saved register arguments SwiftSelf and SwiftError.
1792 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1793 CCValAssign &VA = ArgLocs[I];
1795 return false;
1796 if (!VA.isRegLoc())
1797 return false;
1798 Register Reg = VA.getLocReg();
1799 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1800 return false;
1801 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1802 return false;
1803 }
1804 return true;
1805}
1806
1808 unsigned Offset, bool LoadAdr = false) {
1811 unsigned ADAvReg = MFI->getADAVirtualRegister();
1813
1814 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
1815 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
1816
1817 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
1818 if (!LoadAdr)
1819 Result = DAG.getLoad(
1820 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
1822
1823 return Result;
1824}
1825
1826// ADA access using Global value
1827// Note: for functions, address of descriptor is returned
1829 EVT PtrVT) {
1830 unsigned ADAtype;
1831 bool LoadAddr = false;
1832 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
1833 bool IsFunction =
1834 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
1835 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
1836
1837 if (IsFunction) {
1838 if (IsInternal) {
1840 LoadAddr = true;
1841 } else
1843 } else {
1845 }
1846 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
1847
1848 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
1849}
1850
1851static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
1852 SDLoc &DL, SDValue &Chain) {
1853 unsigned ADADelta = 0; // ADA offset in desc.
1854 unsigned EPADelta = 8; // EPA offset in desc.
1857
1858 // XPLink calling convention.
1859 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1860 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
1861 G->getGlobal()->hasPrivateLinkage());
1862 if (IsInternal) {
1865 unsigned ADAvReg = MFI->getADAVirtualRegister();
1866 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
1867 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1868 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1869 return true;
1870 } else {
1872 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
1873 ADA = getADAEntry(DAG, GA, DL, ADADelta);
1874 Callee = getADAEntry(DAG, GA, DL, EPADelta);
1875 }
1876 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1878 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
1879 ADA = getADAEntry(DAG, ES, DL, ADADelta);
1880 Callee = getADAEntry(DAG, ES, DL, EPADelta);
1881 } else {
1882 // Function pointer case
1883 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
1884 DAG.getConstant(ADADelta, DL, PtrVT));
1885 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
1887 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
1888 DAG.getConstant(EPADelta, DL, PtrVT));
1889 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
1891 }
1892 return false;
1893}
1894
1895SDValue
1897 SmallVectorImpl<SDValue> &InVals) const {
1898 SelectionDAG &DAG = CLI.DAG;
1899 SDLoc &DL = CLI.DL;
1901 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1903 SDValue Chain = CLI.Chain;
1904 SDValue Callee = CLI.Callee;
1905 bool &IsTailCall = CLI.IsTailCall;
1906 CallingConv::ID CallConv = CLI.CallConv;
1907 bool IsVarArg = CLI.IsVarArg;
1909 EVT PtrVT = getPointerTy(MF.getDataLayout());
1910 LLVMContext &Ctx = *DAG.getContext();
1912
1913 // FIXME: z/OS support to be added in later.
1914 if (Subtarget.isTargetXPLINK64())
1915 IsTailCall = false;
1916
1917 // Analyze the operands of the call, assigning locations to each operand.
1919 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1920 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1921
1922 // We don't support GuaranteedTailCallOpt, only automatically-detected
1923 // sibling calls.
1924 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1925 IsTailCall = false;
1926
1927 // Get a count of how many bytes are to be pushed on the stack.
1928 unsigned NumBytes = ArgCCInfo.getStackSize();
1929
1930 // Mark the start of the call.
1931 if (!IsTailCall)
1932 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1933
1934 // Copy argument values to their designated locations.
1936 SmallVector<SDValue, 8> MemOpChains;
1937 SDValue StackPtr;
1938 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1939 CCValAssign &VA = ArgLocs[I];
1940 SDValue ArgValue = OutVals[I];
1941
1942 if (VA.getLocInfo() == CCValAssign::Indirect) {
1943 // Store the argument in a stack slot and pass its address.
1944 unsigned ArgIndex = Outs[I].OrigArgIndex;
1945 EVT SlotVT;
1946 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1947 // Allocate the full stack space for a promoted (and split) argument.
1948 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1949 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1950 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1951 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1952 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1953 } else {
1954 SlotVT = Outs[I].VT;
1955 }
1956 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1957 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1958 MemOpChains.push_back(
1959 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1961 // If the original argument was split (e.g. i128), we need
1962 // to store all parts of it here (and pass just one address).
1963 assert (Outs[I].PartOffset == 0);
1964 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1965 SDValue PartValue = OutVals[I + 1];
1966 unsigned PartOffset = Outs[I + 1].PartOffset;
1967 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1968 DAG.getIntPtrConstant(PartOffset, DL));
1969 MemOpChains.push_back(
1970 DAG.getStore(Chain, DL, PartValue, Address,
1972 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
1973 SlotVT.getStoreSize()) && "Not enough space for argument part!");
1974 ++I;
1975 }
1976 ArgValue = SpillSlot;
1977 } else
1978 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1979
1980 if (VA.isRegLoc()) {
1981 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
1982 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
1983 // and low values.
1984 if (VA.getLocVT() == MVT::i128)
1985 ArgValue = lowerI128ToGR128(DAG, ArgValue);
1986 // Queue up the argument copies and emit them at the end.
1987 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1988 } else {
1989 assert(VA.isMemLoc() && "Argument not register or memory");
1990
1991 // Work out the address of the stack slot. Unpromoted ints and
1992 // floats are passed as right-justified 8-byte values.
1993 if (!StackPtr.getNode())
1994 StackPtr = DAG.getCopyFromReg(Chain, DL,
1995 Regs->getStackPointerRegister(), PtrVT);
1996 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
1997 VA.getLocMemOffset();
1998 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1999 Offset += 4;
2000 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2002
2003 // Emit the store.
2004 MemOpChains.push_back(
2005 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2006
2007 // Although long doubles or vectors are passed through the stack when
2008 // they are vararg (non-fixed arguments), if a long double or vector
2009 // occupies the third and fourth slot of the argument list GPR3 should
2010 // still shadow the third slot of the argument list.
2011 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2012 SDValue ShadowArgValue =
2013 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2014 DAG.getIntPtrConstant(1, DL));
2015 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2016 }
2017 }
2018 }
2019
2020 // Join the stores, which are independent of one another.
2021 if (!MemOpChains.empty())
2022 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2023
2024 // Accept direct calls by converting symbolic call addresses to the
2025 // associated Target* opcodes. Force %r1 to be used for indirect
2026 // tail calls.
2027 SDValue Glue;
2028
2029 if (Subtarget.isTargetXPLINK64()) {
2030 SDValue ADA;
2031 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2032 if (!IsBRASL) {
2033 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2034 ->getAddressOfCalleeRegister();
2035 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2036 Glue = Chain.getValue(1);
2037 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2038 }
2039 RegsToPass.push_back(std::make_pair(
2040 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2041 } else {
2042 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2043 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2044 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2045 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2046 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2047 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2048 } else if (IsTailCall) {
2049 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2050 Glue = Chain.getValue(1);
2051 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2052 }
2053 }
2054
2055 // Build a sequence of copy-to-reg nodes, chained and glued together.
2056 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
2057 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
2058 RegsToPass[I].second, Glue);
2059 Glue = Chain.getValue(1);
2060 }
2061
2062 // The first call operand is the chain and the second is the target address.
2064 Ops.push_back(Chain);
2065 Ops.push_back(Callee);
2066
2067 // Add argument registers to the end of the list so that they are
2068 // known live into the call.
2069 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
2070 Ops.push_back(DAG.getRegister(RegsToPass[I].first,
2071 RegsToPass[I].second.getValueType()));
2072
2073 // Add a register mask operand representing the call-preserved registers.
2074 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2075 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2076 assert(Mask && "Missing call preserved mask for calling convention");
2077 Ops.push_back(DAG.getRegisterMask(Mask));
2078
2079 // Glue the call to the argument copies, if any.
2080 if (Glue.getNode())
2081 Ops.push_back(Glue);
2082
2083 // Emit the call.
2084 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2085 if (IsTailCall) {
2086 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2087 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2088 return Ret;
2089 }
2090 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2091 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2092 Glue = Chain.getValue(1);
2093
2094 // Mark the end of the call, which is glued to the call itself.
2095 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2096 Glue = Chain.getValue(1);
2097
2098 // Assign locations to each value returned by this call.
2100 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2101 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2102
2103 // Copy all of the result registers out of their specified physreg.
2104 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2105 CCValAssign &VA = RetLocs[I];
2106
2107 // Copy the value out, gluing the copy to the end of the call sequence.
2108 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2109 VA.getLocVT(), Glue);
2110 Chain = RetValue.getValue(1);
2111 Glue = RetValue.getValue(2);
2112
2113 // Convert the value of the return register into the value that's
2114 // being returned.
2115 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2116 }
2117
2118 return Chain;
2119}
2120
2121// Generate a call taking the given operands as arguments and returning a
2122// result of type RetVT.
2124 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2125 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2126 bool DoesNotReturn, bool IsReturnValueUsed) const {
2128 Args.reserve(Ops.size());
2129
2131 for (SDValue Op : Ops) {
2132 Entry.Node = Op;
2133 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
2134 Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
2135 Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
2136 Args.push_back(Entry);
2137 }
2138
2139 SDValue Callee =
2140 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2141
2142 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2144 bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned);
2145 CLI.setDebugLoc(DL)
2146 .setChain(Chain)
2147 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2148 .setNoReturn(DoesNotReturn)
2149 .setDiscardResult(!IsReturnValueUsed)
2150 .setSExtResult(SignExtend)
2151 .setZExtResult(!SignExtend);
2152 return LowerCallTo(CLI);
2153}
2154
2157 MachineFunction &MF, bool isVarArg,
2159 LLVMContext &Context) const {
2160 // Special case that we cannot easily detect in RetCC_SystemZ since
2161 // i128 may not be a legal type.
2162 for (auto &Out : Outs)
2163 if (Out.ArgVT == MVT::i128)
2164 return false;
2165
2167 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
2168 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2169}
2170
2171SDValue
2173 bool IsVarArg,
2175 const SmallVectorImpl<SDValue> &OutVals,
2176 const SDLoc &DL, SelectionDAG &DAG) const {
2178
2179 // Assign locations to each returned value.
2181 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2182 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2183
2184 // Quick exit for void returns
2185 if (RetLocs.empty())
2186 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2187
2188 if (CallConv == CallingConv::GHC)
2189 report_fatal_error("GHC functions return void only");
2190
2191 // Copy the result values into the output registers.
2192 SDValue Glue;
2194 RetOps.push_back(Chain);
2195 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2196 CCValAssign &VA = RetLocs[I];
2197 SDValue RetValue = OutVals[I];
2198
2199 // Make the return register live on exit.
2200 assert(VA.isRegLoc() && "Can only return in registers!");
2201
2202 // Promote the value as required.
2203 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2204
2205 // Chain and glue the copies together.
2206 Register Reg = VA.getLocReg();
2207 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2208 Glue = Chain.getValue(1);
2209 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2210 }
2211
2212 // Update chain and glue.
2213 RetOps[0] = Chain;
2214 if (Glue.getNode())
2215 RetOps.push_back(Glue);
2216
2217 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2218}
2219
2220// Return true if Op is an intrinsic node with chain that returns the CC value
2221// as its only (other) argument. Provide the associated SystemZISD opcode and
2222// the mask of valid CC values if so.
2223static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2224 unsigned &CCValid) {
2225 unsigned Id = Op.getConstantOperandVal(1);
2226 switch (Id) {
2227 case Intrinsic::s390_tbegin:
2228 Opcode = SystemZISD::TBEGIN;
2229 CCValid = SystemZ::CCMASK_TBEGIN;
2230 return true;
2231
2232 case Intrinsic::s390_tbegin_nofloat:
2234 CCValid = SystemZ::CCMASK_TBEGIN;
2235 return true;
2236
2237 case Intrinsic::s390_tend:
2238 Opcode = SystemZISD::TEND;
2239 CCValid = SystemZ::CCMASK_TEND;
2240 return true;
2241
2242 default:
2243 return false;
2244 }
2245}
2246
2247// Return true if Op is an intrinsic node without chain that returns the
2248// CC value as its final argument. Provide the associated SystemZISD
2249// opcode and the mask of valid CC values if so.
2250static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2251 unsigned Id = Op.getConstantOperandVal(0);
2252 switch (Id) {
2253 case Intrinsic::s390_vpkshs:
2254 case Intrinsic::s390_vpksfs:
2255 case Intrinsic::s390_vpksgs:
2256 Opcode = SystemZISD::PACKS_CC;
2257 CCValid = SystemZ::CCMASK_VCMP;
2258 return true;
2259
2260 case Intrinsic::s390_vpklshs:
2261 case Intrinsic::s390_vpklsfs:
2262 case Intrinsic::s390_vpklsgs:
2263 Opcode = SystemZISD::PACKLS_CC;
2264 CCValid = SystemZ::CCMASK_VCMP;
2265 return true;
2266
2267 case Intrinsic::s390_vceqbs:
2268 case Intrinsic::s390_vceqhs:
2269 case Intrinsic::s390_vceqfs:
2270 case Intrinsic::s390_vceqgs:
2271 Opcode = SystemZISD::VICMPES;
2272 CCValid = SystemZ::CCMASK_VCMP;
2273 return true;
2274
2275 case Intrinsic::s390_vchbs:
2276 case Intrinsic::s390_vchhs:
2277 case Intrinsic::s390_vchfs:
2278 case Intrinsic::s390_vchgs:
2279 Opcode = SystemZISD::VICMPHS;
2280 CCValid = SystemZ::CCMASK_VCMP;
2281 return true;
2282
2283 case Intrinsic::s390_vchlbs:
2284 case Intrinsic::s390_vchlhs:
2285 case Intrinsic::s390_vchlfs:
2286 case Intrinsic::s390_vchlgs:
2287 Opcode = SystemZISD::VICMPHLS;
2288 CCValid = SystemZ::CCMASK_VCMP;
2289 return true;
2290
2291 case Intrinsic::s390_vtm:
2292 Opcode = SystemZISD::VTM;
2293 CCValid = SystemZ::CCMASK_VCMP;
2294 return true;
2295
2296 case Intrinsic::s390_vfaebs:
2297 case Intrinsic::s390_vfaehs:
2298 case Intrinsic::s390_vfaefs:
2299 Opcode = SystemZISD::VFAE_CC;
2300 CCValid = SystemZ::CCMASK_ANY;
2301 return true;
2302
2303 case Intrinsic::s390_vfaezbs:
2304 case Intrinsic::s390_vfaezhs:
2305 case Intrinsic::s390_vfaezfs:
2306 Opcode = SystemZISD::VFAEZ_CC;
2307 CCValid = SystemZ::CCMASK_ANY;
2308 return true;
2309
2310 case Intrinsic::s390_vfeebs:
2311 case Intrinsic::s390_vfeehs:
2312 case Intrinsic::s390_vfeefs:
2313 Opcode = SystemZISD::VFEE_CC;
2314 CCValid = SystemZ::CCMASK_ANY;
2315 return true;
2316
2317 case Intrinsic::s390_vfeezbs:
2318 case Intrinsic::s390_vfeezhs:
2319 case Intrinsic::s390_vfeezfs:
2320 Opcode = SystemZISD::VFEEZ_CC;
2321 CCValid = SystemZ::CCMASK_ANY;
2322 return true;
2323
2324 case Intrinsic::s390_vfenebs:
2325 case Intrinsic::s390_vfenehs:
2326 case Intrinsic::s390_vfenefs:
2327 Opcode = SystemZISD::VFENE_CC;
2328 CCValid = SystemZ::CCMASK_ANY;
2329 return true;
2330
2331 case Intrinsic::s390_vfenezbs:
2332 case Intrinsic::s390_vfenezhs:
2333 case Intrinsic::s390_vfenezfs:
2334 Opcode = SystemZISD::VFENEZ_CC;
2335 CCValid = SystemZ::CCMASK_ANY;
2336 return true;
2337
2338 case Intrinsic::s390_vistrbs:
2339 case Intrinsic::s390_vistrhs:
2340 case Intrinsic::s390_vistrfs:
2341 Opcode = SystemZISD::VISTR_CC;
2343 return true;
2344
2345 case Intrinsic::s390_vstrcbs:
2346 case Intrinsic::s390_vstrchs:
2347 case Intrinsic::s390_vstrcfs:
2348 Opcode = SystemZISD::VSTRC_CC;
2349 CCValid = SystemZ::CCMASK_ANY;
2350 return true;
2351
2352 case Intrinsic::s390_vstrczbs:
2353 case Intrinsic::s390_vstrczhs:
2354 case Intrinsic::s390_vstrczfs:
2355 Opcode = SystemZISD::VSTRCZ_CC;
2356 CCValid = SystemZ::CCMASK_ANY;
2357 return true;
2358
2359 case Intrinsic::s390_vstrsb:
2360 case Intrinsic::s390_vstrsh:
2361 case Intrinsic::s390_vstrsf:
2362 Opcode = SystemZISD::VSTRS_CC;
2363 CCValid = SystemZ::CCMASK_ANY;
2364 return true;
2365
2366 case Intrinsic::s390_vstrszb:
2367 case Intrinsic::s390_vstrszh:
2368 case Intrinsic::s390_vstrszf:
2369 Opcode = SystemZISD::VSTRSZ_CC;
2370 CCValid = SystemZ::CCMASK_ANY;
2371 return true;
2372
2373 case Intrinsic::s390_vfcedbs:
2374 case Intrinsic::s390_vfcesbs:
2375 Opcode = SystemZISD::VFCMPES;
2376 CCValid = SystemZ::CCMASK_VCMP;
2377 return true;
2378
2379 case Intrinsic::s390_vfchdbs:
2380 case Intrinsic::s390_vfchsbs:
2381 Opcode = SystemZISD::VFCMPHS;
2382 CCValid = SystemZ::CCMASK_VCMP;
2383 return true;
2384
2385 case Intrinsic::s390_vfchedbs:
2386 case Intrinsic::s390_vfchesbs:
2387 Opcode = SystemZISD::VFCMPHES;
2388 CCValid = SystemZ::CCMASK_VCMP;
2389 return true;
2390
2391 case Intrinsic::s390_vftcidb:
2392 case Intrinsic::s390_vftcisb:
2393 Opcode = SystemZISD::VFTCI;
2394 CCValid = SystemZ::CCMASK_VCMP;
2395 return true;
2396
2397 case Intrinsic::s390_tdc:
2398 Opcode = SystemZISD::TDC;
2399 CCValid = SystemZ::CCMASK_TDC;
2400 return true;
2401
2402 default:
2403 return false;
2404 }
2405}
2406
2407// Emit an intrinsic with chain and an explicit CC register result.
2409 unsigned Opcode) {
2410 // Copy all operands except the intrinsic ID.
2411 unsigned NumOps = Op.getNumOperands();
2413 Ops.reserve(NumOps - 1);
2414 Ops.push_back(Op.getOperand(0));
2415 for (unsigned I = 2; I < NumOps; ++I)
2416 Ops.push_back(Op.getOperand(I));
2417
2418 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2419 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2420 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2421 SDValue OldChain = SDValue(Op.getNode(), 1);
2422 SDValue NewChain = SDValue(Intr.getNode(), 1);
2423 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2424 return Intr.getNode();
2425}
2426
2427// Emit an intrinsic with an explicit CC register result.
2429 unsigned Opcode) {
2430 // Copy all operands except the intrinsic ID.
2431 unsigned NumOps = Op.getNumOperands();
2433 Ops.reserve(NumOps - 1);
2434 for (unsigned I = 1; I < NumOps; ++I)
2435 Ops.push_back(Op.getOperand(I));
2436
2437 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2438 return Intr.getNode();
2439}
2440
2441// CC is a comparison that will be implemented using an integer or
2442// floating-point comparison. Return the condition code mask for
2443// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2444// unsigned comparisons and clear for signed ones. In the floating-point
2445// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2447#define CONV(X) \
2448 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2449 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2450 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2451
2452 switch (CC) {
2453 default:
2454 llvm_unreachable("Invalid integer condition!");
2455
2456 CONV(EQ);
2457 CONV(NE);
2458 CONV(GT);
2459 CONV(GE);
2460 CONV(LT);
2461 CONV(LE);
2462
2463 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2465 }
2466#undef CONV
2467}
2468
2469// If C can be converted to a comparison against zero, adjust the operands
2470// as necessary.
2471static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2472 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2473 return;
2474
2475 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2476 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2477 return;
2478
2479 int64_t Value = ConstOp1->getSExtValue();
2480 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2481 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2482 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2483 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2484 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2485 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2486 }
2487}
2488
2489// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2490// adjust the operands as necessary.
2491static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2492 Comparison &C) {
2493 // For us to make any changes, it must a comparison between a single-use
2494 // load and a constant.
2495 if (!C.Op0.hasOneUse() ||
2496 C.Op0.getOpcode() != ISD::LOAD ||
2497 C.Op1.getOpcode() != ISD::Constant)
2498 return;
2499
2500 // We must have an 8- or 16-bit load.
2501 auto *Load = cast<LoadSDNode>(C.Op0);
2502 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2503 if ((NumBits != 8 && NumBits != 16) ||
2504 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2505 return;
2506
2507 // The load must be an extending one and the constant must be within the
2508 // range of the unextended value.
2509 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2510 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2511 return;
2512 uint64_t Value = ConstOp1->getZExtValue();
2513 uint64_t Mask = (1 << NumBits) - 1;
2514 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2515 // Make sure that ConstOp1 is in range of C.Op0.
2516 int64_t SignedValue = ConstOp1->getSExtValue();
2517 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2518 return;
2519 if (C.ICmpType != SystemZICMP::SignedOnly) {
2520 // Unsigned comparison between two sign-extended values is equivalent
2521 // to unsigned comparison between two zero-extended values.
2522 Value &= Mask;
2523 } else if (NumBits == 8) {
2524 // Try to treat the comparison as unsigned, so that we can use CLI.
2525 // Adjust CCMask and Value as necessary.
2526 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2527 // Test whether the high bit of the byte is set.
2528 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2529 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2530 // Test whether the high bit of the byte is clear.
2531 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2532 else
2533 // No instruction exists for this combination.
2534 return;
2535 C.ICmpType = SystemZICMP::UnsignedOnly;
2536 }
2537 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2538 if (Value > Mask)
2539 return;
2540 // If the constant is in range, we can use any comparison.
2541 C.ICmpType = SystemZICMP::Any;
2542 } else
2543 return;
2544
2545 // Make sure that the first operand is an i32 of the right extension type.
2546 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2549 if (C.Op0.getValueType() != MVT::i32 ||
2550 Load->getExtensionType() != ExtType) {
2551 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2552 Load->getBasePtr(), Load->getPointerInfo(),
2553 Load->getMemoryVT(), Load->getAlign(),
2554 Load->getMemOperand()->getFlags());
2555 // Update the chain uses.
2556 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2557 }
2558
2559 // Make sure that the second operand is an i32 with the right value.
2560 if (C.Op1.getValueType() != MVT::i32 ||
2561 Value != ConstOp1->getZExtValue())
2562 C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2563}
2564
2565// Return true if Op is either an unextended load, or a load suitable
2566// for integer register-memory comparisons of type ICmpType.
2567static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2568 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2569 if (Load) {
2570 // There are no instructions to compare a register with a memory byte.
2571 if (Load->getMemoryVT() == MVT::i8)
2572 return false;
2573 // Otherwise decide on extension type.
2574 switch (Load->getExtensionType()) {
2575 case ISD::NON_EXTLOAD:
2576 return true;
2577 case ISD::SEXTLOAD:
2578 return ICmpType != SystemZICMP::UnsignedOnly;
2579 case ISD::ZEXTLOAD:
2580 return ICmpType != SystemZICMP::SignedOnly;
2581 default:
2582 break;
2583 }
2584 }
2585 return false;
2586}
2587
2588// Return true if it is better to swap the operands of C.
2589static bool shouldSwapCmpOperands(const Comparison &C) {
2590 // Leave i128 and f128 comparisons alone, since they have no memory forms.
2591 if (C.Op0.getValueType() == MVT::i128)
2592 return false;
2593 if (C.Op0.getValueType() == MVT::f128)
2594 return false;
2595
2596 // Always keep a floating-point constant second, since comparisons with
2597 // zero can use LOAD TEST and comparisons with other constants make a
2598 // natural memory operand.
2599 if (isa<ConstantFPSDNode>(C.Op1))
2600 return false;
2601
2602 // Never swap comparisons with zero since there are many ways to optimize
2603 // those later.
2604 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2605 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2606 return false;
2607
2608 // Also keep natural memory operands second if the loaded value is
2609 // only used here. Several comparisons have memory forms.
2610 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2611 return false;
2612
2613 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2614 // In that case we generally prefer the memory to be second.
2615 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2616 // The only exceptions are when the second operand is a constant and
2617 // we can use things like CHHSI.
2618 if (!ConstOp1)
2619 return true;
2620 // The unsigned memory-immediate instructions can handle 16-bit
2621 // unsigned integers.
2622 if (C.ICmpType != SystemZICMP::SignedOnly &&
2623 isUInt<16>(ConstOp1->getZExtValue()))
2624 return false;
2625 // The signed memory-immediate instructions can handle 16-bit
2626 // signed integers.
2627 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2628 isInt<16>(ConstOp1->getSExtValue()))
2629 return false;
2630 return true;
2631 }
2632
2633 // Try to promote the use of CGFR and CLGFR.
2634 unsigned Opcode0 = C.Op0.getOpcode();
2635 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2636 return true;
2637 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2638 return true;
2639 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
2640 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2641 C.Op0.getConstantOperandVal(1) == 0xffffffff)
2642 return true;
2643
2644 return false;
2645}
2646
2647// Check whether C tests for equality between X and Y and whether X - Y
2648// or Y - X is also computed. In that case it's better to compare the
2649// result of the subtraction against zero.
2651 Comparison &C) {
2652 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2653 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2654 for (SDNode *N : C.Op0->uses()) {
2655 if (N->getOpcode() == ISD::SUB &&
2656 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2657 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2658 // Disable the nsw and nuw flags: the backend needs to handle
2659 // overflow as well during comparison elimination.
2660 SDNodeFlags Flags = N->getFlags();
2661 Flags.setNoSignedWrap(false);
2662 Flags.setNoUnsignedWrap(false);
2663 N->setFlags(Flags);
2664 C.Op0 = SDValue(N, 0);
2665 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2666 return;
2667 }
2668 }
2669 }
2670}
2671
2672// Check whether C compares a floating-point value with zero and if that
2673// floating-point value is also negated. In this case we can use the
2674// negation to set CC, so avoiding separate LOAD AND TEST and
2675// LOAD (NEGATIVE/COMPLEMENT) instructions.
2676static void adjustForFNeg(Comparison &C) {
2677 // This optimization is invalid for strict comparisons, since FNEG
2678 // does not raise any exceptions.
2679 if (C.Chain)
2680 return;
2681 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2682 if (C1 && C1->isZero()) {
2683 for (SDNode *N : C.Op0->uses()) {
2684 if (N->getOpcode() == ISD::FNEG) {
2685 C.Op0 = SDValue(N, 0);
2686 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2687 return;
2688 }
2689 }
2690 }
2691}
2692
2693// Check whether C compares (shl X, 32) with 0 and whether X is
2694// also sign-extended. In that case it is better to test the result
2695// of the sign extension using LTGFR.
2696//
2697// This case is important because InstCombine transforms a comparison
2698// with (sext (trunc X)) into a comparison with (shl X, 32).
2699static void adjustForLTGFR(Comparison &C) {
2700 // Check for a comparison between (shl X, 32) and 0.
2701 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
2702 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
2703 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2704 if (C1 && C1->getZExtValue() == 32) {
2705 SDValue ShlOp0 = C.Op0.getOperand(0);
2706 // See whether X has any SIGN_EXTEND_INREG uses.
2707 for (SDNode *N : ShlOp0->uses()) {
2708 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2709 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2710 C.Op0 = SDValue(N, 0);
2711 return;
2712 }
2713 }
2714 }
2715 }
2716}
2717
2718// If C compares the truncation of an extending load, try to compare
2719// the untruncated value instead. This exposes more opportunities to
2720// reuse CC.
2721static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2722 Comparison &C) {
2723 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2724 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2725 C.Op1.getOpcode() == ISD::Constant &&
2726 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
2727 C.Op1->getAsZExtVal() == 0) {
2728 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2729 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
2730 C.Op0.getValueSizeInBits().getFixedValue()) {
2731 unsigned Type = L->getExtensionType();
2732 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2733 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2734 C.Op0 = C.Op0.getOperand(0);
2735 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2736 }
2737 }
2738 }
2739}
2740
2741// Return true if shift operation N has an in-range constant shift value.
2742// Store it in ShiftVal if so.
2743static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2744 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2745 if (!Shift)
2746 return false;
2747
2748 uint64_t Amount = Shift->getZExtValue();
2749 if (Amount >= N.getValueSizeInBits())
2750 return false;
2751
2752 ShiftVal = Amount;
2753 return true;
2754}
2755
2756// Check whether an AND with Mask is suitable for a TEST UNDER MASK
2757// instruction and whether the CC value is descriptive enough to handle
2758// a comparison of type Opcode between the AND result and CmpVal.
2759// CCMask says which comparison result is being tested and BitSize is
2760// the number of bits in the operands. If TEST UNDER MASK can be used,
2761// return the corresponding CC mask, otherwise return 0.
2762static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2763 uint64_t Mask, uint64_t CmpVal,
2764 unsigned ICmpType) {
2765 assert(Mask != 0 && "ANDs with zero should have been removed by now");
2766
2767 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2768 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2769 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2770 return 0;
2771
2772 // Work out the masks for the lowest and highest bits.
2774 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
2775
2776 // Signed ordered comparisons are effectively unsigned if the sign
2777 // bit is dropped.
2778 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2779
2780 // Check for equality comparisons with 0, or the equivalent.
2781 if (CmpVal == 0) {
2782 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2784 if (CCMask == SystemZ::CCMASK_CMP_NE)
2786 }
2787 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2788 if (CCMask == SystemZ::CCMASK_CMP_LT)
2790 if (CCMask == SystemZ::CCMASK_CMP_GE)
2792 }
2793 if (EffectivelyUnsigned && CmpVal < Low) {
2794 if (CCMask == SystemZ::CCMASK_CMP_LE)
2796 if (CCMask == SystemZ::CCMASK_CMP_GT)
2798 }
2799
2800 // Check for equality comparisons with the mask, or the equivalent.
2801 if (CmpVal == Mask) {
2802 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2804 if (CCMask == SystemZ::CCMASK_CMP_NE)
2806 }
2807 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2808 if (CCMask == SystemZ::CCMASK_CMP_GT)
2810 if (CCMask == SystemZ::CCMASK_CMP_LE)
2812 }
2813 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2814 if (CCMask == SystemZ::CCMASK_CMP_GE)
2816 if (CCMask == SystemZ::CCMASK_CMP_LT)
2818 }
2819
2820 // Check for ordered comparisons with the top bit.
2821 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2822 if (CCMask == SystemZ::CCMASK_CMP_LE)
2824 if (CCMask == SystemZ::CCMASK_CMP_GT)
2826 }
2827 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2828 if (CCMask == SystemZ::CCMASK_CMP_LT)
2830 if (CCMask == SystemZ::CCMASK_CMP_GE)
2832 }
2833
2834 // If there are just two bits, we can do equality checks for Low and High
2835 // as well.
2836 if (Mask == Low + High) {
2837 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2839 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2841 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2843 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2845 }
2846
2847 // Looks like we've exhausted our options.
2848 return 0;
2849}
2850
2851// See whether C can be implemented as a TEST UNDER MASK instruction.
2852// Update the arguments with the TM version if so.
2854 Comparison &C) {
2855 // Use VECTOR TEST UNDER MASK for i128 operations.
2856 if (C.Op0.getValueType() == MVT::i128) {
2857 // We can use VTM for EQ/NE comparisons of x & y against 0.
2858 if (C.Op0.getOpcode() == ISD::AND &&
2859 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2860 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
2861 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
2862 if (Mask && Mask->getAPIntValue() == 0) {
2863 C.Opcode = SystemZISD::VTM;
2864 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
2865 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
2866 C.CCValid = SystemZ::CCMASK_VCMP;
2867 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
2868 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
2869 else
2870 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
2871 }
2872 }
2873 return;
2874 }
2875
2876 // Check that we have a comparison with a constant.
2877 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2878 if (!ConstOp1)
2879 return;
2880 uint64_t CmpVal = ConstOp1->getZExtValue();
2881
2882 // Check whether the nonconstant input is an AND with a constant mask.
2883 Comparison NewC(C);
2884 uint64_t MaskVal;
2885 ConstantSDNode *Mask = nullptr;
2886 if (C.Op0.getOpcode() == ISD::AND) {
2887 NewC.Op0 = C.Op0.getOperand(0);
2888 NewC.Op1 = C.Op0.getOperand(1);
2889 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2890 if (!Mask)
2891 return;
2892 MaskVal = Mask->getZExtValue();
2893 } else {
2894 // There is no instruction to compare with a 64-bit immediate
2895 // so use TMHH instead if possible. We need an unsigned ordered
2896 // comparison with an i64 immediate.
2897 if (NewC.Op0.getValueType() != MVT::i64 ||
2898 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2899 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2900 NewC.ICmpType == SystemZICMP::SignedOnly)
2901 return;
2902 // Convert LE and GT comparisons into LT and GE.
2903 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2904 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2905 if (CmpVal == uint64_t(-1))
2906 return;
2907 CmpVal += 1;
2908 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2909 }
2910 // If the low N bits of Op1 are zero than the low N bits of Op0 can
2911 // be masked off without changing the result.
2912 MaskVal = -(CmpVal & -CmpVal);
2913 NewC.ICmpType = SystemZICMP::UnsignedOnly;
2914 }
2915 if (!MaskVal)
2916 return;
2917
2918 // Check whether the combination of mask, comparison value and comparison
2919 // type are suitable.
2920 unsigned BitSize = NewC.Op0.getValueSizeInBits();
2921 unsigned NewCCMask, ShiftVal;
2922 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2923 NewC.Op0.getOpcode() == ISD::SHL &&
2924 isSimpleShift(NewC.Op0, ShiftVal) &&
2925 (MaskVal >> ShiftVal != 0) &&
2926 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2927 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2928 MaskVal >> ShiftVal,
2929 CmpVal >> ShiftVal,
2930 SystemZICMP::Any))) {
2931 NewC.Op0 = NewC.Op0.getOperand(0);
2932 MaskVal >>= ShiftVal;
2933 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2934 NewC.Op0.getOpcode() == ISD::SRL &&
2935 isSimpleShift(NewC.Op0, ShiftVal) &&
2936 (MaskVal << ShiftVal != 0) &&
2937 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2938 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2939 MaskVal << ShiftVal,
2940 CmpVal << ShiftVal,
2942 NewC.Op0 = NewC.Op0.getOperand(0);
2943 MaskVal <<= ShiftVal;
2944 } else {
2945 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2946 NewC.ICmpType);
2947 if (!NewCCMask)
2948 return;
2949 }
2950
2951 // Go ahead and make the change.
2952 C.Opcode = SystemZISD::TM;
2953 C.Op0 = NewC.Op0;
2954 if (Mask && Mask->getZExtValue() == MaskVal)
2955 C.Op1 = SDValue(Mask, 0);
2956 else
2957 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2958 C.CCValid = SystemZ::CCMASK_TM;
2959 C.CCMask = NewCCMask;
2960}
2961
2962// Implement i128 comparison in vector registers.
2963static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
2964 Comparison &C) {
2965 if (C.Opcode != SystemZISD::ICMP)
2966 return;
2967 if (C.Op0.getValueType() != MVT::i128)
2968 return;
2969
2970 // (In-)Equality comparisons can be implemented via VCEQGS.
2971 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2972 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2973 C.Opcode = SystemZISD::VICMPES;
2974 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
2975 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
2976 C.CCValid = SystemZ::CCMASK_VCMP;
2977 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
2978 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
2979 else
2980 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
2981 return;
2982 }
2983
2984 // Normalize other comparisons to GT.
2985 bool Swap = false, Invert = false;
2986 switch (C.CCMask) {
2987 case SystemZ::CCMASK_CMP_GT: break;
2988 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
2989 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
2990 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
2991 default: llvm_unreachable("Invalid integer condition!");
2992 }
2993 if (Swap)
2994 std::swap(C.Op0, C.Op1);
2995
2996 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2997 C.Opcode = SystemZISD::UCMP128HI;
2998 else
2999 C.Opcode = SystemZISD::SCMP128HI;
3000 C.CCValid = SystemZ::CCMASK_ANY;
3001 C.CCMask = SystemZ::CCMASK_1;
3002
3003 if (Invert)
3004 C.CCMask ^= C.CCValid;
3005}
3006
3007// See whether the comparison argument contains a redundant AND
3008// and remove it if so. This sometimes happens due to the generic
3009// BRCOND expansion.
3011 Comparison &C) {
3012 if (C.Op0.getOpcode() != ISD::AND)
3013 return;
3014 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3015 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3016 return;
3017 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3018 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3019 return;
3020
3021 C.Op0 = C.Op0.getOperand(0);
3022}
3023
3024// Return a Comparison that tests the condition-code result of intrinsic
3025// node Call against constant integer CC using comparison code Cond.
3026// Opcode is the opcode of the SystemZISD operation for the intrinsic
3027// and CCValid is the set of possible condition-code results.
3028static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3029 SDValue Call, unsigned CCValid, uint64_t CC,
3031 Comparison C(Call, SDValue(), SDValue());
3032 C.Opcode = Opcode;
3033 C.CCValid = CCValid;
3034 if (Cond == ISD::SETEQ)
3035 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3036 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3037 else if (Cond == ISD::SETNE)
3038 // ...and the inverse of that.
3039 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3040 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3041 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3042 // always true for CC>3.
3043 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3044 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3045 // ...and the inverse of that.
3046 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3047 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3048 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3049 // always true for CC>3.
3050 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3051 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3052 // ...and the inverse of that.
3053 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3054 else
3055 llvm_unreachable("Unexpected integer comparison type");
3056 C.CCMask &= CCValid;
3057 return C;
3058}
3059
3060// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3061static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3062 ISD::CondCode Cond, const SDLoc &DL,
3063 SDValue Chain = SDValue(),
3064 bool IsSignaling = false) {
3065 if (CmpOp1.getOpcode() == ISD::Constant) {
3066 assert(!Chain);
3067 unsigned Opcode, CCValid;
3068 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3069 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3070 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3071 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3072 CmpOp1->getAsZExtVal(), Cond);
3073 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3074 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3075 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3076 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3077 CmpOp1->getAsZExtVal(), Cond);
3078 }
3079 Comparison C(CmpOp0, CmpOp1, Chain);
3080 C.CCMask = CCMaskForCondCode(Cond);
3081 if (C.Op0.getValueType().isFloatingPoint()) {
3082 C.CCValid = SystemZ::CCMASK_FCMP;
3083 if (!C.Chain)
3084 C.Opcode = SystemZISD::FCMP;
3085 else if (!IsSignaling)
3086 C.Opcode = SystemZISD::STRICT_FCMP;
3087 else
3088 C.Opcode = SystemZISD::STRICT_FCMPS;
3090 } else {
3091 assert(!C.Chain);
3092 C.CCValid = SystemZ::CCMASK_ICMP;
3093 C.Opcode = SystemZISD::ICMP;
3094 // Choose the type of comparison. Equality and inequality tests can
3095 // use either signed or unsigned comparisons. The choice also doesn't
3096 // matter if both sign bits are known to be clear. In those cases we
3097 // want to give the main isel code the freedom to choose whichever
3098 // form fits best.
3099 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3100 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3101 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3102 C.ICmpType = SystemZICMP::Any;
3103 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3104 C.ICmpType = SystemZICMP::UnsignedOnly;
3105 else
3106 C.ICmpType = SystemZICMP::SignedOnly;
3107 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3108 adjustForRedundantAnd(DAG, DL, C);
3109 adjustZeroCmp(DAG, DL, C);
3110 adjustSubwordCmp(DAG, DL, C);
3111 adjustForSubtraction(DAG, DL, C);
3113 adjustICmpTruncate(DAG, DL, C);
3114 }
3115
3116 if (shouldSwapCmpOperands(C)) {
3117 std::swap(C.Op0, C.Op1);
3118 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3119 }
3120
3122 adjustICmp128(DAG, DL, C);
3123 return C;
3124}
3125
3126// Emit the comparison instruction described by C.
3127static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3128 if (!C.Op1.getNode()) {
3129 SDNode *Node;
3130 switch (C.Op0.getOpcode()) {
3132 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3133 return SDValue(Node, 0);
3135 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3136 return SDValue(Node, Node->getNumValues() - 1);
3137 default:
3138 llvm_unreachable("Invalid comparison operands");
3139 }
3140 }
3141 if (C.Opcode == SystemZISD::ICMP)
3142 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3143 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3144 if (C.Opcode == SystemZISD::TM) {
3145 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3147 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3148 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3149 }
3150 if (C.Opcode == SystemZISD::VICMPES) {
3151 SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32);
3152 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3153 return SDValue(Val.getNode(), 1);
3154 }
3155 if (C.Chain) {
3156 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3157 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3158 }
3159 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3160}
3161
3162// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3163// 64 bits. Extend is the extension type to use. Store the high part
3164// in Hi and the low part in Lo.
3165static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3166 SDValue Op0, SDValue Op1, SDValue &Hi,
3167 SDValue &Lo) {
3168 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3169 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3170 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3171 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3172 DAG.getConstant(32, DL, MVT::i64));
3173 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3174 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3175}
3176
3177// Lower a binary operation that produces two VT results, one in each
3178// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3179// and Opcode performs the GR128 operation. Store the even register result
3180// in Even and the odd register result in Odd.
3181static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3182 unsigned Opcode, SDValue Op0, SDValue Op1,
3183 SDValue &Even, SDValue &Odd) {
3184 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3185 bool Is32Bit = is32Bit(VT);
3186 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3187 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3188}
3189
3190// Return an i32 value that is 1 if the CC value produced by CCReg is
3191// in the mask CCMask and 0 otherwise. CC is known to have a value
3192// in CCValid, so other values can be ignored.
3193static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3194 unsigned CCValid, unsigned CCMask) {
3195 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3196 DAG.getConstant(0, DL, MVT::i32),
3197 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3198 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3199 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3200}
3201
3202// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3203// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3204// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3205// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3206// floating-point comparisons.
3209 switch (CC) {
3210 case ISD::SETOEQ:
3211 case ISD::SETEQ:
3212 switch (Mode) {
3213 case CmpMode::Int: return SystemZISD::VICMPE;
3214 case CmpMode::FP: return SystemZISD::VFCMPE;
3215 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3216 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3217 }
3218 llvm_unreachable("Bad mode");
3219
3220 case ISD::SETOGE:
3221 case ISD::SETGE:
3222 switch (Mode) {
3223 case CmpMode::Int: return 0;
3224 case CmpMode::FP: return SystemZISD::VFCMPHE;
3225 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3226 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3227 }
3228 llvm_unreachable("Bad mode");
3229
3230 case ISD::SETOGT:
3231 case ISD::SETGT:
3232 switch (Mode) {
3233 case CmpMode::Int: return SystemZISD::VICMPH;
3234 case CmpMode::FP: return SystemZISD::VFCMPH;
3235 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3236 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3237 }
3238 llvm_unreachable("Bad mode");
3239
3240 case ISD::SETUGT:
3241 switch (Mode) {
3242 case CmpMode::Int: return SystemZISD::VICMPHL;
3243 case CmpMode::FP: return 0;
3244 case CmpMode::StrictFP: return 0;
3245 case CmpMode::SignalingFP: return 0;
3246 }
3247 llvm_unreachable("Bad mode");
3248
3249 default:
3250 return 0;
3251 }
3252}
3253
3254// Return the SystemZISD vector comparison operation for CC or its inverse,
3255// or 0 if neither can be done directly. Indicate in Invert whether the
3256// result is for the inverse of CC. Mode is as above.
3258 bool &Invert) {
3259 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3260 Invert = false;
3261 return Opcode;
3262 }
3263
3264 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3265 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3266 Invert = true;
3267 return Opcode;
3268 }
3269
3270 return 0;
3271}
3272
3273// Return a v2f64 that contains the extended form of elements Start and Start+1
3274// of v4f32 value Op. If Chain is nonnull, return the strict form.
3275static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3276 SDValue Op, SDValue Chain) {
3277 int Mask[] = { Start, -1, Start + 1, -1 };
3278 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3279 if (Chain) {
3280 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3281 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3282 }
3283 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3284}
3285
3286// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3287// producing a result of type VT. If Chain is nonnull, return the strict form.
3288SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3289 const SDLoc &DL, EVT VT,
3290 SDValue CmpOp0,
3291 SDValue CmpOp1,
3292 SDValue Chain) const {
3293 // There is no hardware support for v4f32 (unless we have the vector
3294 // enhancements facility 1), so extend the vector into two v2f64s
3295 // and compare those.
3296 if (CmpOp0.getValueType() == MVT::v4f32 &&
3297 !Subtarget.hasVectorEnhancements1()) {
3298 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3299 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3300 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3301 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3302 if (Chain) {
3303 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3304 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3305 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3306 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3307 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3308 H1.getValue(1), L1.getValue(1),
3309 HRes.getValue(1), LRes.getValue(1) };
3310 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3311 SDValue Ops[2] = { Res, NewChain };
3312 return DAG.getMergeValues(Ops, DL);
3313 }
3314 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3315 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3316 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3317 }
3318 if (Chain) {
3319 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3320 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3321 }
3322 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3323}
3324
3325// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3326// an integer mask of type VT. If Chain is nonnull, we have a strict
3327// floating-point comparison. If in addition IsSignaling is true, we have
3328// a strict signaling floating-point comparison.
3329SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3330 const SDLoc &DL, EVT VT,
3332 SDValue CmpOp0,
3333 SDValue CmpOp1,
3334 SDValue Chain,
3335 bool IsSignaling) const {
3336 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3337 assert (!Chain || IsFP);
3338 assert (!IsSignaling || Chain);
3339 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3340 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3341 bool Invert = false;
3342 SDValue Cmp;
3343 switch (CC) {
3344 // Handle tests for order using (or (ogt y x) (oge x y)).
3345 case ISD::SETUO:
3346 Invert = true;
3347 [[fallthrough]];
3348 case ISD::SETO: {
3349 assert(IsFP && "Unexpected integer comparison");
3350 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3351 DL, VT, CmpOp1, CmpOp0, Chain);
3352 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3353 DL, VT, CmpOp0, CmpOp1, Chain);
3354 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3355 if (Chain)
3356 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3357 LT.getValue(1), GE.getValue(1));
3358 break;
3359 }
3360
3361 // Handle <> tests using (or (ogt y x) (ogt x y)).
3362 case ISD::SETUEQ:
3363 Invert = true;
3364 [[fallthrough]];
3365 case ISD::SETONE: {
3366 assert(IsFP && "Unexpected integer comparison");
3367 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3368 DL, VT, CmpOp1, CmpOp0, Chain);
3369 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3370 DL, VT, CmpOp0, CmpOp1, Chain);
3371 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3372 if (Chain)
3373 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3374 LT.getValue(1), GT.getValue(1));
3375 break;
3376 }
3377
3378 // Otherwise a single comparison is enough. It doesn't really
3379 // matter whether we try the inversion or the swap first, since
3380 // there are no cases where both work.
3381 default:
3382 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3383 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3384 else {
3386 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3387 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3388 else
3389 llvm_unreachable("Unhandled comparison");
3390 }
3391 if (Chain)
3392 Chain = Cmp.getValue(1);
3393 break;
3394 }
3395 if (Invert) {
3396 SDValue Mask =
3397 DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
3398 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3399 }
3400 if (Chain && Chain.getNode() != Cmp.getNode()) {
3401 SDValue Ops[2] = { Cmp, Chain };
3402 Cmp = DAG.getMergeValues(Ops, DL);
3403 }
3404 return Cmp;
3405}
3406
3407SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3408 SelectionDAG &DAG) const {
3409 SDValue CmpOp0 = Op.getOperand(0);
3410 SDValue CmpOp1 = Op.getOperand(1);
3411 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3412 SDLoc DL(Op);
3413 EVT VT = Op.getValueType();
3414 if (VT.isVector())
3415 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3416
3417 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3418 SDValue CCReg = emitCmp(DAG, DL, C);
3419 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3420}
3421
3422SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3423 SelectionDAG &DAG,
3424 bool IsSignaling) const {
3425 SDValue Chain = Op.getOperand(0);
3426 SDValue CmpOp0 = Op.getOperand(1);
3427 SDValue CmpOp1 = Op.getOperand(2);
3428 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3429 SDLoc DL(Op);
3430 EVT VT = Op.getNode()->getValueType(0);
3431 if (VT.isVector()) {
3432 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3433 Chain, IsSignaling);
3434 return Res.getValue(Op.getResNo());
3435 }
3436
3437 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3438 SDValue CCReg = emitCmp(DAG, DL, C);
3439 CCReg->setFlags(Op->getFlags());
3440 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3441 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3442 return DAG.getMergeValues(Ops, DL);
3443}
3444
3445SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3446 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3447 SDValue CmpOp0 = Op.getOperand(2);
3448 SDValue CmpOp1 = Op.getOperand(3);
3449 SDValue Dest = Op.getOperand(4);
3450 SDLoc DL(Op);
3451
3452 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3453 SDValue CCReg = emitCmp(DAG, DL, C);
3454 return DAG.getNode(
3455 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3456 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3457 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3458}
3459
3460// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3461// allowing Pos and Neg to be wider than CmpOp.
3462static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3463 return (Neg.getOpcode() == ISD::SUB &&
3464 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3465 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
3466 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3467 Pos.getOperand(0) == CmpOp)));
3468}
3469
3470// Return the absolute or negative absolute of Op; IsNegative decides which.
3472 bool IsNegative) {
3473 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3474 if (IsNegative)
3475 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3476 DAG.getConstant(0, DL, Op.getValueType()), Op);
3477 return Op;
3478}
3479
3480SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3481 SelectionDAG &DAG) const {
3482 SDValue CmpOp0 = Op.getOperand(0);
3483 SDValue CmpOp1 = Op.getOperand(1);
3484 SDValue TrueOp = Op.getOperand(2);
3485 SDValue FalseOp = Op.getOperand(3);
3486 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3487 SDLoc DL(Op);
3488
3489 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3490
3491 // Check for absolute and negative-absolute selections, including those
3492 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3493 // This check supplements the one in DAGCombiner.
3494 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3495 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3496 C.Op1.getOpcode() == ISD::Constant &&
3497 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3498 C.Op1->getAsZExtVal() == 0) {
3499 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3500 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3501 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3502 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3503 }
3504
3505 SDValue CCReg = emitCmp(DAG, DL, C);
3506 SDValue Ops[] = {TrueOp, FalseOp,
3507 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3508 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3509
3510 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3511}
3512
3513SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3514 SelectionDAG &DAG) const {
3515 SDLoc DL(Node);
3516 const GlobalValue *GV = Node->getGlobal();
3517 int64_t Offset = Node->getOffset();
3518 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3520
3522 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3523 if (isInt<32>(Offset)) {
3524 // Assign anchors at 1<<12 byte boundaries.
3525 uint64_t Anchor = Offset & ~uint64_t(0xfff);
3526 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3527 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3528
3529 // The offset can be folded into the address if it is aligned to a
3530 // halfword.
3531 Offset -= Anchor;
3532 if (Offset != 0 && (Offset & 1) == 0) {
3533 SDValue Full =
3534 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3535 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3536 Offset = 0;
3537 }
3538 } else {
3539 // Conservatively load a constant offset greater than 32 bits into a
3540 // register below.
3541 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3542 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3543 }
3544 } else if (Subtarget.isTargetELF()) {
3545 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3546 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3547 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3549 } else if (Subtarget.isTargetzOS()) {
3550 Result = getADAEntry(DAG, GV, DL, PtrVT);
3551 } else
3552 llvm_unreachable("Unexpected Subtarget");
3553
3554 // If there was a non-zero offset that we didn't fold, create an explicit
3555 // addition for it.
3556 if (Offset != 0)
3557 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3558 DAG.getConstant(Offset, DL, PtrVT));
3559
3560 return Result;
3561}
3562
3563SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3564 SelectionDAG &DAG,
3565 unsigned Opcode,
3566 SDValue GOTOffset) const {
3567 SDLoc DL(Node);
3568 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3569 SDValue Chain = DAG.getEntryNode();
3570 SDValue Glue;
3571
3574 report_fatal_error("In GHC calling convention TLS is not supported");
3575
3576 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3577 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3578 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3579 Glue = Chain.getValue(1);
3580 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3581 Glue = Chain.getValue(1);
3582
3583 // The first call operand is the chain and the second is the TLS symbol.
3585 Ops.push_back(Chain);
3586 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3587 Node->getValueType(0),
3588 0, 0));
3589
3590 // Add argument registers to the end of the list so that they are
3591 // known live into the call.
3592 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3593 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3594
3595 // Add a register mask operand representing the call-preserved registers.
3596 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3597 const uint32_t *Mask =
3598 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3599 assert(Mask && "Missing call preserved mask for calling convention");
3600 Ops.push_back(DAG.getRegisterMask(Mask));
3601
3602 // Glue the call to the argument copies.
3603 Ops.push_back(Glue);
3604
3605 // Emit the call.
3606 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3607 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3608 Glue = Chain.getValue(1);
3609
3610 // Copy the return value from %r2.
3611 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3612}
3613
3614SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3615 SelectionDAG &DAG) const {
3616 SDValue Chain = DAG.getEntryNode();
3617 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3618
3619 // The high part of the thread pointer is in access register 0.
3620 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3621 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3622
3623 // The low part of the thread pointer is in access register 1.
3624 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3625 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3626
3627 // Merge them into a single 64-bit address.
3628 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3629 DAG.getConstant(32, DL, PtrVT));
3630 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3631}
3632
3633SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3634 SelectionDAG &DAG) const {
3635 if (DAG.getTarget().useEmulatedTLS())
3636 return LowerToTLSEmulatedModel(Node, DAG);
3637 SDLoc DL(Node);
3638 const GlobalValue *GV = Node->getGlobal();
3639 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3640 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
3641
3644 report_fatal_error("In GHC calling convention TLS is not supported");
3645
3646 SDValue TP = lowerThreadPointer(DL, DAG);
3647
3648 // Get the offset of GA from the thread pointer, based on the TLS model.
3650 switch (model) {
3652 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3655
3656 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3657 Offset = DAG.getLoad(
3658 PtrVT, DL, DAG.getEntryNode(), Offset,
3660
3661 // Call __tls_get_offset to retrieve the offset.
3662 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3663 break;
3664 }
3665
3667 // Load the GOT offset of the module ID.
3670
3671 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3672 Offset = DAG.getLoad(
3673 PtrVT, DL, DAG.getEntryNode(), Offset,
3675
3676 // Call __tls_get_offset to retrieve the module base offset.
3677 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3678
3679 // Note: The SystemZLDCleanupPass will remove redundant computations
3680 // of the module base offset. Count total number of local-dynamic
3681 // accesses to trigger execution of that pass.
3685
3686 // Add the per-symbol offset.
3688
3689 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3690 DTPOffset = DAG.getLoad(
3691 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3693
3694 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3695 break;
3696 }
3697
3698 case TLSModel::InitialExec: {
3699 // Load the offset from the GOT.
3700 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3703 Offset =
3704 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3706 break;
3707 }
3708
3709 case TLSModel::LocalExec: {
3710 // Force the offset into the constant pool and load it from there.
3713
3714 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3715 Offset = DAG.getLoad(
3716 PtrVT, DL, DAG.getEntryNode(), Offset,
3718 break;
3719 }
3720 }
3721
3722 // Add the base and offset together.
3723 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3724}
3725
3726SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3727 SelectionDAG &DAG) const {
3728 SDLoc DL(Node);
3729 const BlockAddress *BA = Node->getBlockAddress();
3730 int64_t Offset = Node->getOffset();
3731 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3732
3733 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3734 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3735 return Result;
3736}
3737
3738SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3739 SelectionDAG &DAG) const {
3740 SDLoc DL(JT);
3741 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3742 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3743
3744 // Use LARL to load the address of the table.
3745 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3746}
3747
3748SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3749 SelectionDAG &DAG) const {
3750 SDLoc DL(CP);
3751 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3752
3754 if (CP->isMachineConstantPoolEntry())
3755 Result =
3756 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3757 else
3758 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3759 CP->getOffset());
3760
3761 // Use LARL to load the address of the constant pool entry.
3762 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3763}
3764
3765SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3766 SelectionDAG &DAG) const {
3767 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
3769 MachineFrameInfo &MFI = MF.getFrameInfo();
3770 MFI.setFrameAddressIsTaken(true);
3771
3772 SDLoc DL(Op);
3773 unsigned Depth = Op.getConstantOperandVal(0);
3774 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3775
3776 // By definition, the frame address is the address of the back chain. (In
3777 // the case of packed stack without backchain, return the address where the
3778 // backchain would have been stored. This will either be an unused space or
3779 // contain a saved register).
3780 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3781 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3782
3783 if (Depth > 0) {
3784 // FIXME The frontend should detect this case.
3785 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
3786 report_fatal_error("Unsupported stack frame traversal count");
3787
3788 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
3789 while (Depth--) {
3790 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
3792 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
3793 }
3794 }
3795
3796 return BackChain;
3797}
3798
3799SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3800 SelectionDAG &DAG) const {
3802 MachineFrameInfo &MFI = MF.getFrameInfo();
3803 MFI.setReturnAddressIsTaken(true);
3804
3806 return SDValue();
3807
3808 SDLoc DL(Op);
3809 unsigned Depth = Op.getConstantOperandVal(0);
3810 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3811
3812 if (Depth > 0) {
3813 // FIXME The frontend should detect this case.
3814 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
3815 report_fatal_error("Unsupported stack frame traversal count");
3816
3817 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
3818 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
3819 int Offset = TFL->getReturnAddressOffset(MF);
3820 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
3821 DAG.getConstant(Offset, DL, PtrVT));
3822 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
3824 }
3825
3826 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an
3827 // implicit live-in.
3830 &SystemZ::GR64BitRegClass);
3831 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3832}
3833
3834SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3835 SelectionDAG &DAG) const {
3836 SDLoc DL(Op);
3837 SDValue In = Op.getOperand(0);
3838 EVT InVT = In.getValueType();
3839 EVT ResVT = Op.getValueType();
3840
3841 // Convert loads directly. This is normally done by DAGCombiner,
3842 // but we need this case for bitcasts that are created during lowering
3843 // and which are then lowered themselves.
3844 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3845 if (ISD::isNormalLoad(LoadN)) {
3846 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3847 LoadN->getBasePtr(), LoadN->getMemOperand());
3848 // Update the chain uses.
3849 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3850 return NewLoad;
3851 }
3852
3853 if (InVT == MVT::i32 && ResVT == MVT::f32) {
3854 SDValue In64;
3855 if (Subtarget.hasHighWord()) {
3856 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3857 MVT::i64);
3858 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3859 MVT::i64, SDValue(U64, 0), In);
3860 } else {
3861 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3862 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3863 DAG.getConstant(32, DL, MVT::i64));
3864 }
3865 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3866 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3867 DL, MVT::f32, Out64);
3868 }
3869 if (InVT == MVT::f32 && ResVT == MVT::i32) {
3870 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3871 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3872 MVT::f64, SDValue(U64, 0), In);
3873 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3874 if (Subtarget.hasHighWord())
3875 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3876 MVT::i32, Out64);
3877 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3878 DAG.getConstant(32, DL, MVT::i64));
3879 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3880 }
3881 llvm_unreachable("Unexpected bitcast combination");
3882}
3883
3884SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3885 SelectionDAG &DAG) const {
3886
3887 if (Subtarget.isTargetXPLINK64())
3888 return lowerVASTART_XPLINK(Op, DAG);
3889 else
3890 return lowerVASTART_ELF(Op, DAG);
3891}
3892
3893SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
3894 SelectionDAG &DAG) const {
3896 SystemZMachineFunctionInfo *FuncInfo =
3898
3899 SDLoc DL(Op);
3900
3901 // vastart just stores the address of the VarArgsFrameIndex slot into the
3902 // memory location argument.
3903 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3904 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3905 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3906 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
3907 MachinePointerInfo(SV));
3908}
3909
3910SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
3911 SelectionDAG &DAG) const {
3913 SystemZMachineFunctionInfo *FuncInfo =
3915 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3916
3917 SDValue Chain = Op.getOperand(0);
3918 SDValue Addr = Op.getOperand(1);
3919 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3920 SDLoc DL(Op);
3921
3922 // The initial values of each field.
3923 const unsigned NumFields = 4;
3924 SDValue Fields[NumFields] = {
3925 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3926 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3927 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3928 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3929 };
3930
3931 // Store each field into its respective slot.
3932 SDValue MemOps[NumFields];
3933 unsigned Offset = 0;
3934 for (unsigned I = 0; I < NumFields; ++I) {
3935 SDValue FieldAddr = Addr;
3936 if (Offset != 0)
3937 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3939 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3941 Offset += 8;
3942 }
3943 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3944}
3945
3946SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3947 SelectionDAG &DAG) const {
3948 SDValue Chain = Op.getOperand(0);
3949 SDValue DstPtr = Op.getOperand(1);
3950 SDValue SrcPtr = Op.getOperand(2);
3951 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3952 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3953 SDLoc DL(Op);
3954
3955 uint32_t Sz =
3956 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
3957 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
3958 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3959 /*isTailCall*/ false, MachinePointerInfo(DstSV),
3960 MachinePointerInfo(SrcSV));
3961}
3962
3963SDValue
3964SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
3965 SelectionDAG &DAG) const {
3966 if (Subtarget.isTargetXPLINK64())
3967 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
3968 else
3969 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
3970}
3971
3972SDValue
3973SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
3974 SelectionDAG &DAG) const {
3975 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3977 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3978 SDValue Chain = Op.getOperand(0);
3979 SDValue Size = Op.getOperand(1);
3980 SDValue Align = Op.getOperand(2);
3981 SDLoc DL(Op);
3982
3983 // If user has set the no alignment function attribute, ignore
3984 // alloca alignments.
3985 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
3986
3987 uint64_t StackAlign = TFI->getStackAlignment();
3988 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3989 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3990
3991 SDValue NeededSpace = Size;
3992
3993 // Add extra space for alignment if needed.
3994 EVT PtrVT = getPointerTy(MF.getDataLayout());
3995 if (ExtraAlignSpace)
3996 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
3997 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3998
3999 bool IsSigned = false;
4000 bool DoesNotReturn = false;
4001 bool IsReturnValueUsed = false;
4002 EVT VT = Op.getValueType();
4003 SDValue AllocaCall =
4004 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
4005 CallingConv::C, IsSigned, DL, DoesNotReturn,
4006 IsReturnValueUsed)
4007 .first;
4008
4009 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4010 // to end of call in order to ensure it isn't broken up from the call
4011 // sequence.
4012 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4013 Register SPReg = Regs.getStackPointerRegister();
4014 Chain = AllocaCall.getValue(1);
4015 SDValue Glue = AllocaCall.getValue(2);
4016 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4017 Chain = NewSPRegNode.getValue(1);
4018
4019 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4020 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4021 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4022
4023 // Dynamically realign if needed.
4024 if (ExtraAlignSpace) {
4025 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4026 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4027 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4028 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4029 }
4030
4031 SDValue Ops[2] = {Result, Chain};
4032 return DAG.getMergeValues(Ops, DL);
4033}
4034
4035SDValue
4036SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4037 SelectionDAG &DAG) const {
4038 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4040 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4041 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4042
4043 SDValue Chain = Op.getOperand(0);
4044 SDValue Size = Op.getOperand(1);
4045 SDValue Align = Op.getOperand(2);
4046 SDLoc DL(Op);
4047
4048 // If user has set the no alignment function attribute, ignore
4049 // alloca alignments.
4050 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4051
4052 uint64_t StackAlign = TFI->getStackAlignment();
4053 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4054 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4055
4057 SDValue NeededSpace = Size;
4058
4059 // Get a reference to the stack pointer.
4060 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4061
4062 // If we need a backchain, save it now.
4063 SDValue Backchain;
4064 if (StoreBackchain)
4065 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4067
4068 // Add extra space for alignment if needed.
4069 if (ExtraAlignSpace)
4070 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4071 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4072
4073 // Get the new stack pointer value.
4074 SDValue NewSP;
4075 if (hasInlineStackProbe(MF)) {
4077 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4078 Chain = NewSP.getValue(1);
4079 }
4080 else {
4081 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4082 // Copy the new stack pointer back.
4083 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4084 }
4085
4086 // The allocated data lives above the 160 bytes allocated for the standard
4087 // frame, plus any outgoing stack arguments. We don't know how much that
4088 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4089 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4090 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4091
4092 // Dynamically realign if needed.
4093 if (RequiredAlign > StackAlign) {
4094 Result =
4095 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4096 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4097 Result =
4098 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4099 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4100 }
4101
4102 if (StoreBackchain)
4103 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4105
4106 SDValue Ops[2] = { Result, Chain };
4107 return DAG.getMergeValues(Ops, DL);
4108}
4109
4110SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4111 SDValue Op, SelectionDAG &DAG) const {
4112 SDLoc DL(Op);
4113
4114 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4115}
4116
4117SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4118 SelectionDAG &DAG) const {
4119 EVT VT = Op.getValueType();
4120 SDLoc DL(Op);
4121 SDValue Ops[2];
4122 if (is32Bit(VT))
4123 // Just do a normal 64-bit multiplication and extract the results.
4124 // We define this so that it can be used for constant division.
4125 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4126 Op.getOperand(1), Ops[1], Ops[0]);
4127 else if (Subtarget.hasMiscellaneousExtensions2())
4128 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4129 // the high result in the even register. ISD::SMUL_LOHI is defined to
4130 // return the low half first, so the results are in reverse order.
4132 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4133 else {
4134 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4135 //
4136 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4137 //
4138 // but using the fact that the upper halves are either all zeros
4139 // or all ones:
4140 //
4141 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4142 //
4143 // and grouping the right terms together since they are quicker than the
4144 // multiplication:
4145 //
4146 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4147 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4148 SDValue LL = Op.getOperand(0);
4149 SDValue RL = Op.getOperand(1);
4150 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4151 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4152 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4153 // the high result in the even register. ISD::SMUL_LOHI is defined to
4154 // return the low half first, so the results are in reverse order.
4156 LL, RL, Ops[1], Ops[0]);
4157 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4158 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4159 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4160 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4161 }
4162 return DAG.getMergeValues(Ops, DL);
4163}
4164
4165SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4166 SelectionDAG &DAG) const {
4167 EVT VT = Op.getValueType();
4168 SDLoc DL(Op);
4169 SDValue Ops[2];
4170 if (is32Bit(VT))
4171 // Just do a normal 64-bit multiplication and extract the results.
4172 // We define this so that it can be used for constant division.
4173 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4174 Op.getOperand(1), Ops[1], Ops[0]);
4175 else
4176 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4177 // the high result in the even register. ISD::UMUL_LOHI is defined to
4178 // return the low half first, so the results are in reverse order.
4180 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4181 return DAG.getMergeValues(Ops, DL);
4182}
4183
4184SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4185 SelectionDAG &DAG) const {
4186 SDValue Op0 = Op.getOperand(0);
4187 SDValue Op1 = Op.getOperand(1);
4188 EVT VT = Op.getValueType();
4189 SDLoc DL(Op);
4190
4191 // We use DSGF for 32-bit division. This means the first operand must
4192 // always be 64-bit, and the second operand should be 32-bit whenever
4193 // that is possible, to improve performance.
4194 if (is32Bit(VT))
4195 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4196 else if (DAG.ComputeNumSignBits(Op1) > 32)
4197 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4198
4199 // DSG(F) returns the remainder in the even register and the
4200 // quotient in the odd register.
4201 SDValue Ops[2];
4202 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4203 return DAG.getMergeValues(Ops, DL);
4204}
4205
4206SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4207 SelectionDAG &DAG) const {
4208 EVT VT = Op.getValueType();
4209 SDLoc DL(Op);
4210
4211 // DL(G) returns the remainder in the even register and the
4212 // quotient in the odd register.
4213 SDValue Ops[2];
4215 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4216 return DAG.getMergeValues(Ops, DL);
4217}
4218
4219SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4220 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4221
4222 // Get the known-zero masks for each operand.
4223 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4224 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4225 DAG.computeKnownBits(Ops[1])};
4226
4227 // See if the upper 32 bits of one operand and the lower 32 bits of the
4228 // other are known zero. They are the low and high operands respectively.
4229 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4230 Known[1].Zero.getZExtValue() };
4231 unsigned High, Low;
4232 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4233 High = 1, Low = 0;
4234 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4235 High = 0, Low = 1;
4236 else
4237 return Op;
4238
4239 SDValue LowOp = Ops[Low];
4240 SDValue HighOp = Ops[High];
4241
4242 // If the high part is a constant, we're better off using IILH.
4243 if (HighOp.getOpcode() == ISD::Constant)
4244 return Op;
4245
4246 // If the low part is a constant that is outside the range of LHI,
4247 // then we're better off using IILF.
4248 if (LowOp.getOpcode() == ISD::Constant) {
4249 int64_t Value = int32_t(LowOp->getAsZExtVal());
4250 if (!isInt<16>(Value))
4251 return Op;
4252 }
4253
4254 // Check whether the high part is an AND that doesn't change the
4255 // high 32 bits and just masks out low bits. We can skip it if so.
4256 if (HighOp.getOpcode() == ISD::AND &&
4257 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4258 SDValue HighOp0 = HighOp.getOperand(0);
4260 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4261 HighOp = HighOp0;
4262 }
4263
4264 // Take advantage of the fact that all GR32 operations only change the
4265 // low 32 bits by truncating Low to an i32 and inserting it directly
4266 // using a subreg. The interesting cases are those where the truncation
4267 // can be folded.
4268 SDLoc DL(Op);
4269 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4270 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4271 MVT::i64, HighOp, Low32);
4272}
4273
4274// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4275SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4276 SelectionDAG &DAG) const {
4277 SDNode *N = Op.getNode();
4278 SDValue LHS = N->getOperand(0);
4279 SDValue RHS = N->getOperand(1);
4280 SDLoc DL(N);
4281
4282 if (N->getValueType(0) == MVT::i128) {
4283 unsigned BaseOp = 0;
4284 unsigned FlagOp = 0;
4285 bool IsBorrow = false;
4286 switch (Op.getOpcode()) {
4287 default: llvm_unreachable("Unknown instruction!");
4288 case ISD::UADDO:
4289 BaseOp = ISD::ADD;
4290 FlagOp = SystemZISD::VACC;
4291 break;
4292 case ISD::USUBO:
4293 BaseOp = ISD::SUB;
4294 FlagOp = SystemZISD::VSCBI;
4295 IsBorrow = true;
4296 break;
4297 }
4298 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4299 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4300 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4301 DAG.getValueType(MVT::i1));
4302 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4303 if (IsBorrow)
4304 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4305 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4306 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4307 }
4308
4309 unsigned BaseOp = 0;
4310 unsigned CCValid = 0;
4311 unsigned CCMask = 0;
4312
4313 switch (Op.getOpcode()) {
4314 default: llvm_unreachable("Unknown instruction!");
4315 case ISD::SADDO:
4316 BaseOp = SystemZISD::SADDO;
4317 CCValid = SystemZ::CCMASK_ARITH;
4319 break;
4320 case ISD::SSUBO:
4321 BaseOp = SystemZISD::SSUBO;
4322 CCValid = SystemZ::CCMASK_ARITH;
4324 break;
4325 case ISD::UADDO:
4326 BaseOp = SystemZISD::UADDO;
4327 CCValid = SystemZ::CCMASK_LOGICAL;
4329 break;
4330 case ISD::USUBO:
4331 BaseOp = SystemZISD::USUBO;
4332 CCValid = SystemZ::CCMASK_LOGICAL;
4334 break;
4335 }
4336
4337 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4338 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4339
4340 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4341 if (N->getValueType(1) == MVT::i1)
4342 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4343
4344 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4345}
4346
4347static bool isAddCarryChain(SDValue Carry) {
4348 while (Carry.getOpcode() == ISD::UADDO_CARRY)
4349 Carry = Carry.getOperand(2);
4350 return Carry.getOpcode() == ISD::UADDO;
4351}
4352
4353static bool isSubBorrowChain(SDValue Carry) {
4354 while (Carry.getOpcode() == ISD::USUBO_CARRY)
4355 Carry = Carry.getOperand(2);
4356 return Carry.getOpcode() == ISD::USUBO;
4357}
4358
4359// Lower UADDO_CARRY/USUBO_CARRY nodes.
4360SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4361 SelectionDAG &DAG) const {
4362
4363 SDNode *N = Op.getNode();
4364 MVT VT = N->getSimpleValueType(0);
4365
4366 // Let legalize expand this if it isn't a legal type yet.
4367 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4368 return SDValue();
4369
4370 SDValue LHS = N->getOperand(0);
4371 SDValue RHS = N->getOperand(1);
4372 SDValue Carry = Op.getOperand(2);
4373 SDLoc DL(N);
4374
4375 if (VT == MVT::i128) {
4376 unsigned BaseOp = 0;
4377 unsigned FlagOp = 0;
4378 bool IsBorrow = false;
4379 switch (Op.getOpcode()) {
4380 default: llvm_unreachable("Unknown instruction!");
4381 case ISD::UADDO_CARRY:
4382 BaseOp = SystemZISD::VAC;
4383 FlagOp = SystemZISD::VACCC;
4384 break;
4385 case ISD::USUBO_CARRY:
4386 BaseOp = SystemZISD::VSBI;
4387 FlagOp = SystemZISD::VSBCBI;
4388 IsBorrow = true;
4389 break;
4390 }
4391 if (IsBorrow)
4392 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4393 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
4394 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
4395 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
4396 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
4397 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4398 DAG.getValueType(MVT::i1));
4399 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4400 if (IsBorrow)
4401 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4402 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4403 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4404 }
4405
4406 unsigned BaseOp = 0;
4407 unsigned CCValid = 0;
4408 unsigned CCMask = 0;
4409
4410 switch (Op.getOpcode()) {
4411 default: llvm_unreachable("Unknown instruction!");
4412 case ISD::UADDO_CARRY:
4413 if (!isAddCarryChain(Carry))
4414 return SDValue();
4415
4416 BaseOp = SystemZISD::ADDCARRY;
4417 CCValid = SystemZ::CCMASK_LOGICAL;
4419 break;
4420 case ISD::USUBO_CARRY:
4421 if (!isSubBorrowChain(Carry))
4422 return SDValue();
4423
4424 BaseOp = SystemZISD::SUBCARRY;
4425 CCValid = SystemZ::CCMASK_LOGICAL;
4427 break;
4428 }
4429
4430 // Set the condition code from the carry flag.
4431 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4432 DAG.getConstant(CCValid, DL, MVT::i32),
4433 DAG.getConstant(CCMask, DL, MVT::i32));
4434
4435 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4436 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4437
4438 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4439 if (N->getValueType(1) == MVT::i1)
4440 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4441
4442 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4443}
4444
4445SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4446 SelectionDAG &DAG) const {
4447 EVT VT = Op.getValueType();
4448 SDLoc DL(Op);
4449 Op = Op.getOperand(0);
4450
4451 if (VT.getScalarSizeInBits() == 128) {
4452 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
4453 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
4454 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
4455 DAG.getConstant(0, DL, MVT::i64));
4456 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4457 return Op;
4458 }
4459
4460 // Handle vector types via VPOPCT.
4461 if (VT.isVector()) {
4462 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4463 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4464 switch (VT.getScalarSizeInBits()) {
4465 case 8:
4466 break;
4467 case 16: {
4468 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4469 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
4470 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
4471 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4472 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
4473 break;
4474 }
4475 case 32: {
4476 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4477 DAG.getConstant(0, DL, MVT::i32));
4478 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4479 break;
4480 }
4481 case 64: {
4482 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4483 DAG.getConstant(0, DL, MVT::i32));
4484 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
4485 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4486 break;
4487 }
4488 default:
4489 llvm_unreachable("Unexpected type");
4490 }
4491 return Op;
4492 }
4493
4494 // Get the known-zero mask for the operand.
4495 KnownBits Known = DAG.computeKnownBits(Op);
4496 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
4497 if (NumSignificantBits == 0)
4498 return DAG.getConstant(0, DL, VT);
4499
4500 // Skip known-zero high parts of the operand.
4501 int64_t OrigBitSize = VT.getSizeInBits();
4502 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
4503 BitSize = std::min(BitSize, OrigBitSize);
4504
4505 // The POPCNT instruction counts the number of bits in each byte.
4506 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
4507 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
4508 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
4509
4510 // Add up per-byte counts in a binary tree. All bits of Op at
4511 // position larger than BitSize remain zero throughout.
4512 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
4513 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
4514 if (BitSize != OrigBitSize)
4515 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
4516 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
4517 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4518 }
4519
4520 // Extract overall result from high byte.
4521 if (BitSize > 8)
4522 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
4523 DAG.getConstant(BitSize - 8, DL, VT));
4524
4525 return Op;
4526}
4527
4528SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
4529 SelectionDAG &DAG) const {
4530 SDLoc DL(Op);
4531 AtomicOrdering FenceOrdering =
4532 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4533 SyncScope::ID FenceSSID =
4534 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4535
4536 // The only fence that needs an instruction is a sequentially-consistent
4537 // cross-thread fence.
4538 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
4539 FenceSSID == SyncScope::System) {
4540 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
4541 Op.getOperand(0)),
4542 0);
4543 }
4544
4545 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4546 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
4547}
4548
4549SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
4550 SelectionDAG &DAG) const {
4551 auto *Node = cast<AtomicSDNode>(Op.getNode());
4552 assert(
4553 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
4554 "Only custom lowering i128 or f128.");
4555 // Use same code to handle both legal and non-legal i128 types.
4558 return DAG.getMergeValues(Results, SDLoc(Op));
4559}
4560
4561// Prepare for a Compare And Swap for a subword operation. This needs to be
4562// done in memory with 4 bytes at natural alignment.
4564 SDValue &AlignedAddr, SDValue &BitShift,
4565 SDValue &NegBitShift) {
4566 EVT PtrVT = Addr.getValueType();
4567 EVT WideVT = MVT::i32;
4568
4569 // Get the address of the containing word.
4570 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4571 DAG.getConstant(-4, DL, PtrVT));
4572
4573 // Get the number of bits that the word must be rotated left in order
4574 // to bring the field to the top bits of a GR32.
4575 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4576 DAG.getConstant(3, DL, PtrVT));
4577 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4578
4579 // Get the complementing shift amount, for rotating a field in the top
4580 // bits back to its proper position.
4581 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4582 DAG.getConstant(0, DL, WideVT), BitShift);
4583
4584}
4585
4586// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
4587// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
4588SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
4589 SelectionDAG &DAG,
4590 unsigned Opcode) const {
4591 auto *Node = cast<AtomicSDNode>(Op.getNode());
4592
4593 // 32-bit operations need no special handling.
4594 EVT NarrowVT = Node->getMemoryVT();
4595 EVT WideVT = MVT::i32;
4596 if (NarrowVT == WideVT)
4597 return Op;
4598
4599 int64_t BitSize = NarrowVT.getSizeInBits();
4600 SDValue ChainIn = Node->getChain();
4601 SDValue Addr = Node->getBasePtr();
4602 SDValue Src2 = Node->getVal();
4603 MachineMemOperand *MMO = Node->getMemOperand();
4604 SDLoc DL(Node);
4605
4606 // Convert atomic subtracts of constants into additions.
4607 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
4608 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
4610 Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
4611 }
4612
4613 SDValue AlignedAddr, BitShift, NegBitShift;
4614 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4615
4616 // Extend the source operand to 32 bits and prepare it for the inner loop.
4617 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
4618 // operations require the source to be shifted in advance. (This shift
4619 // can be folded if the source is constant.) For AND and NAND, the lower
4620 // bits must be set, while for other opcodes they should be left clear.
4621 if (Opcode != SystemZISD::ATOMIC_SWAPW)
4622 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
4623 DAG.getConstant(32 - BitSize, DL, WideVT));
4624 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
4626 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
4627 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
4628
4629 // Construct the ATOMIC_LOADW_* node.
4630 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
4631 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
4632 DAG.getConstant(BitSize, DL, WideVT) };
4633 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
4634 NarrowVT, MMO);
4635
4636 // Rotate the result of the final CS so that the field is in the lower
4637 // bits of a GR32, then truncate it.
4638 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
4639 DAG.getConstant(BitSize, DL, WideVT));
4640 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
4641
4642 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
4643 return DAG.getMergeValues(RetOps, DL);
4644}
4645
4646// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
4647// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
4648SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
4649 SelectionDAG &DAG) const {
4650 auto *Node = cast<AtomicSDNode>(Op.getNode());
4651 EVT MemVT = Node->getMemoryVT();
4652 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
4653 // A full-width operation: negate and use LAA(G).
4654 assert(Op.getValueType() == MemVT && "Mismatched VTs");
4655 assert(Subtarget.hasInterlockedAccess1() &&
4656 "Should have been expanded by AtomicExpand pass.");
4657 SDValue Src2 = Node->getVal();
4658 SDLoc DL(Src2);
4659 SDValue NegSrc2 =
4660 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
4661 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
4662 Node->getChain(), Node->getBasePtr(), NegSrc2,
4663 Node->getMemOperand());
4664 }
4665
4666 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
4667}
4668
4669// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
4670SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
4671 SelectionDAG &DAG) const {
4672 auto *Node = cast<AtomicSDNode>(Op.getNode());
4673 SDValue ChainIn = Node->getOperand(0);
4674 SDValue Addr = Node->getOperand(1);
4675 SDValue CmpVal = Node->getOperand(2);
4676 SDValue SwapVal = Node->getOperand(3);
4677 MachineMemOperand *MMO = Node->getMemOperand();
4678 SDLoc DL(Node);
4679
4680 if (Node->getMemoryVT() == MVT::i128) {
4681 // Use same code to handle both legal and non-legal i128 types.
4684 return DAG.getMergeValues(Results, DL);
4685 }
4686
4687 // We have native support for 32-bit and 64-bit compare and swap, but we
4688 // still need to expand extracting the "success" result from the CC.
4689 EVT NarrowVT = Node->getMemoryVT();
4690 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
4691 if (NarrowVT == WideVT) {
4692 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4693 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
4695 DL, Tys, Ops, NarrowVT, MMO);
4696 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4698
4699 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
4700 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4701 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4702 return SDValue();
4703 }
4704
4705 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
4706 // via a fullword ATOMIC_CMP_SWAPW operation.
4707 int64_t BitSize = NarrowVT.getSizeInBits();
4708
4709 SDValue AlignedAddr, BitShift, NegBitShift;
4710 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4711
4712 // Construct the ATOMIC_CMP_SWAPW node.
4713 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4714 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
4715 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
4717 VTList, Ops, NarrowVT, MMO);
4718 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4720
4721 // emitAtomicCmpSwapW() will zero extend the result (original value).
4722 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
4723 DAG.getValueType(NarrowVT));
4724 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
4725 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4726 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4727 return SDValue();
4728}
4729
4731SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
4732 // Because of how we convert atomic_load and atomic_store to normal loads and
4733 // stores in the DAG, we need to ensure that the MMOs are marked volatile
4734 // since DAGCombine hasn't been updated to account for atomic, but non
4735 // volatile loads. (See D57601)
4736 if (auto *SI = dyn_cast<StoreInst>(&I))
4737 if (SI->isAtomic())
4739 if (auto *LI = dyn_cast<LoadInst>(&I))
4740 if (LI->isAtomic())
4742 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
4743 if (AI->isAtomic())
4745 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
4746 if (AI->isAtomic())
4749}
4750
4751SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
4752 SelectionDAG &DAG) const {
4754 auto *Regs = Subtarget.getSpecialRegisters();
4756 report_fatal_error("Variable-sized stack allocations are not supported "
4757 "in GHC calling convention");
4758 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
4759 Regs->getStackPointerRegister(), Op.getValueType());
4760}
4761
4762SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
4763 SelectionDAG &DAG) const {
4765 auto *Regs = Subtarget.getSpecialRegisters();
4766 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4767
4769 report_fatal_error("Variable-sized stack allocations are not supported "
4770 "in GHC calling convention");
4771
4772 SDValue Chain = Op.getOperand(0);
4773 SDValue NewSP = Op.getOperand(1);
4774 SDValue Backchain;
4775 SDLoc DL(Op);
4776
4777 if (StoreBackchain) {
4778 SDValue OldSP = DAG.getCopyFromReg(
4779 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
4780 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4782 }
4783
4784 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
4785
4786 if (StoreBackchain)
4787 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4789
4790 return Chain;
4791}
4792
4793SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
4794 SelectionDAG &DAG) const {
4795 bool IsData = Op.getConstantOperandVal(4);
4796 if (!IsData)
4797 // Just preserve the chain.
4798 return Op.getOperand(0);
4799
4800 SDLoc DL(Op);
4801 bool IsWrite = Op.getConstantOperandVal(2);
4802 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
4803 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
4804 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
4805 Op.getOperand(1)};
4807 Node->getVTList(), Ops,
4808 Node->getMemoryVT(), Node->getMemOperand());
4809}
4810
4811// Convert condition code in CCReg to an i32 value.
4813 SDLoc DL(CCReg);
4814 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
4815 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
4816 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
4817}
4818
4819SDValue
4820SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4821 SelectionDAG &DAG) const {
4822 unsigned Opcode, CCValid;
4823 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
4824 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
4825 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
4826 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
4827 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
4828 return SDValue();
4829 }
4830
4831 return SDValue();
4832}
4833
4834SDValue
4835SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4836 SelectionDAG &DAG) const {
4837 unsigned Opcode, CCValid;
4838 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
4839 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
4840 if (Op->getNumValues() == 1)
4841 return getCCResult(DAG, SDValue(Node, 0));
4842 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
4843 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
4844 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
4845 }
4846
4847 unsigned Id = Op.getConstantOperandVal(0);
4848 switch (Id) {
4849 case Intrinsic::thread_pointer:
4850 return lowerThreadPointer(SDLoc(Op), DAG);
4851
4852 case Intrinsic::s390_vpdi:
4853 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
4854 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4855
4856 case Intrinsic::s390_vperm:
4857 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
4858 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4859
4860 case Intrinsic::s390_vuphb:
4861 case Intrinsic::s390_vuphh:
4862 case Intrinsic::s390_vuphf:
4863 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
4864 Op.getOperand(1));
4865
4866 case Intrinsic::s390_vuplhb:
4867 case Intrinsic::s390_vuplhh:
4868 case Intrinsic::s390_vuplhf:
4869 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
4870 Op.getOperand(1));
4871
4872 case Intrinsic::s390_vuplb:
4873 case Intrinsic::s390_vuplhw:
4874 case Intrinsic::s390_vuplf:
4875 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
4876 Op.getOperand(1));
4877
4878 case Intrinsic::s390_vupllb:
4879 case Intrinsic::s390_vupllh:
4880 case Intrinsic::s390_vupllf:
4881 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
4882 Op.getOperand(1));
4883
4884 case Intrinsic::s390_vsumb:
4885 case Intrinsic::s390_vsumh:
4886 case Intrinsic::s390_vsumgh:
4887 case Intrinsic::s390_vsumgf:
4888 case Intrinsic::s390_vsumqf:
4889 case Intrinsic::s390_vsumqg:
4890 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
4891 Op.getOperand(1), Op.getOperand(2));
4892
4893 case Intrinsic::s390_vaq:
4894 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
4895 Op.getOperand(1), Op.getOperand(2));
4896 case Intrinsic::s390_vaccb:
4897 case Intrinsic::s390_vacch:
4898 case Intrinsic::s390_vaccf:
4899 case Intrinsic::s390_vaccg:
4900 case Intrinsic::s390_vaccq:
4901 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
4902 Op.getOperand(1), Op.getOperand(2));
4903 case Intrinsic::s390_vacq:
4904 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
4905 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4906 case Intrinsic::s390_vacccq:
4907 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
4908 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4909
4910 case Intrinsic::s390_vsq:
4911 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
4912 Op.getOperand(1), Op.getOperand(2));
4913 case Intrinsic::s390_vscbib:
4914 case Intrinsic::s390_vscbih:
4915 case Intrinsic::s390_vscbif:
4916 case Intrinsic::s390_vscbig:
4917 case Intrinsic::s390_vscbiq:
4918 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
4919 Op.getOperand(1), Op.getOperand(2));
4920 case Intrinsic::s390_vsbiq:
4921 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
4922 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4923 case Intrinsic::s390_vsbcbiq:
4924 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
4925 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4926 }
4927
4928 return SDValue();
4929}
4930
4931namespace {
4932// Says that SystemZISD operation Opcode can be used to perform the equivalent
4933// of a VPERM with permute vector Bytes. If Opcode takes three operands,
4934// Operand is the constant third operand, otherwise it is the number of
4935// bytes in each element of the result.
4936struct Permute {
4937 unsigned Opcode;
4938 unsigned Operand;
4939 unsigned char Bytes[SystemZ::VectorBytes];
4940};
4941}
4942
4943static const Permute PermuteForms[] = {
4944 // VMRHG
4946 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
4947 // VMRHF
4949 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
4950 // VMRHH
4952 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
4953 // VMRHB
4955 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
4956 // VMRLG
4958 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
4959 // VMRLF
4961 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
4962 // VMRLH
4964 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
4965 // VMRLB
4967 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
4968 // VPKG
4969 { SystemZISD::PACK, 4,
4970 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
4971 // VPKF
4972 { SystemZISD::PACK, 2,
4973 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
4974 // VPKH
4975 { SystemZISD::PACK, 1,
4976 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
4977 // VPDI V1, V2, 4 (low half of V1, high half of V2)
4979 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
4980 // VPDI V1, V2, 1 (high half of V1, low half of V2)
4982 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
4983};
4984
4985// Called after matching a vector shuffle against a particular pattern.
4986// Both the original shuffle and the pattern have two vector operands.
4987// OpNos[0] is the operand of the original shuffle that should be used for
4988// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
4989// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
4990// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
4991// for operands 0 and 1 of the pattern.
4992static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
4993 if (OpNos[0] < 0) {
4994 if (OpNos[1] < 0)
4995 return false;
4996 OpNo0 = OpNo1 = OpNos[1];
4997 } else if (OpNos[1] < 0) {
4998 OpNo0 = OpNo1 = OpNos[0];
4999 } else {
5000 OpNo0 = OpNos[0];
5001 OpNo1 = OpNos[1];
5002 }
5003 return true;
5004}
5005
5006// Bytes is a VPERM-like permute vector, except that -1 is used for
5007// undefined bytes. Return true if the VPERM can be implemented using P.
5008// When returning true set OpNo0 to the VPERM operand that should be
5009// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5010//
5011// For example, if swapping the VPERM operands allows P to match, OpNo0
5012// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5013// operand, but rewriting it to use two duplicated operands allows it to
5014// match P, then OpNo0 and OpNo1 will be the same.
5015static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5016 unsigned &OpNo0, unsigned &OpNo1) {
5017 int OpNos[] = { -1, -1 };
5018 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5019 int Elt = Bytes[I];
5020 if (Elt >= 0) {
5021 // Make sure that the two permute vectors use the same suboperand
5022 // byte number. Only the operand numbers (the high bits) are
5023 // allowed to differ.
5024 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5025 return false;
5026 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5027 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5028 // Make sure that the operand mappings are consistent with previous
5029 // elements.
5030 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5031 return false;
5032 OpNos[ModelOpNo] = RealOpNo;
5033 }
5034 }
5035 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5036}
5037
5038// As above, but search for a matching permute.
5039static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5040 unsigned &OpNo0, unsigned &OpNo1) {
5041 for (auto &P : PermuteForms)
5042 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5043 return &P;
5044 return nullptr;
5045}
5046
5047// Bytes is a VPERM-like permute vector, except that -1 is used for
5048// undefined bytes. This permute is an operand of an outer permute.
5049// See whether redistributing the -1 bytes gives a shuffle that can be
5050// implemented using P. If so, set Transform to a VPERM-like permute vector
5051// that, when applied to the result of P, gives the original permute in Bytes.
5053 const Permute &P,
5054 SmallVectorImpl<int> &Transform) {
5055 unsigned To = 0;
5056 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5057 int Elt = Bytes[From];
5058 if (Elt < 0)
5059 // Byte number From of the result is undefined.
5060 Transform[From] = -1;
5061 else {
5062 while (P.Bytes[To] != Elt) {
5063 To += 1;
5064 if (To == SystemZ::VectorBytes)
5065 return false;
5066 }
5067 Transform[From] = To;
5068 }
5069 }
5070 return true;
5071}
5072
5073// As above, but search for a matching permute.
5074static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5075 SmallVectorImpl<int> &Transform) {
5076 for (auto &P : PermuteForms)
5077 if (matchDoublePermute(Bytes, P, Transform))
5078 return &P;
5079 return nullptr;
5080}
5081
5082// Convert the mask of the given shuffle op into a byte-level mask,
5083// as if it had type vNi8.
5084static bool getVPermMask(SDValue ShuffleOp,
5085 SmallVectorImpl<int> &Bytes) {
5086 EVT VT = ShuffleOp.getValueType();
5087 unsigned NumElements = VT.getVectorNumElements();
5088 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5089
5090 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5091 Bytes.resize(NumElements * BytesPerElement, -1);
5092 for (unsigned I = 0; I < NumElements; ++I) {
5093 int Index = VSN->getMaskElt(I);
5094 if (Index >= 0)
5095 for (unsigned J = 0; J < BytesPerElement; ++J)
5096 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5097 }
5098 return true;
5099 }
5100 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5101 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5102 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5103 Bytes.resize(NumElements * BytesPerElement, -1);
5104 for (unsigned I = 0; I < NumElements; ++I)
5105 for (unsigned J = 0; J < BytesPerElement; ++J)
5106 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5107 return true;
5108 }
5109 return false;
5110}
5111
5112// Bytes is a VPERM-like permute vector, except that -1 is used for
5113// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5114// the result come from a contiguous sequence of bytes from one input.
5115// Set Base to the selector for the first byte if so.
5116static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5117 unsigned BytesPerElement, int &Base) {
5118 Base = -1;
5119 for (unsigned I = 0; I < BytesPerElement; ++I) {
5120 if (Bytes[Start + I] >= 0) {
5121 unsigned Elem = Bytes[Start + I];
5122 if (Base < 0) {
5123 Base = Elem - I;
5124 // Make sure the bytes would come from one input operand.
5125 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5126 return false;
5127 } else if (unsigned(Base) != Elem - I)
5128 return false;
5129 }
5130 }
5131 return true;
5132}
5133
5134// Bytes is a VPERM-like permute vector, except that -1 is used for
5135// undefined bytes. Return true if it can be performed using VSLDB.
5136// When returning true, set StartIndex to the shift amount and OpNo0
5137// and OpNo1 to the VPERM operands that should be used as the first
5138// and second shift operand respectively.
5140 unsigned &StartIndex, unsigned &OpNo0,
5141 unsigned &OpNo1) {
5142 int OpNos[] = { -1, -1 };
5143 int Shift = -1;
5144 for (unsigned I = 0; I < 16; ++I) {
5145 int Index = Bytes[I];
5146 if (Index >= 0) {
5147 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5148 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5149 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5150 if (Shift < 0)
5151 Shift = ExpectedShift;
5152 else if (Shift != ExpectedShift)
5153 return false;
5154 // Make sure that the operand mappings are consistent with previous
5155 // elements.
5156 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5157 return false;
5158 OpNos[ModelOpNo] = RealOpNo;
5159 }
5160 }
5161 StartIndex = Shift;
5162 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5163}
5164
5165// Create a node that performs P on operands Op0 and Op1, casting the
5166// operands to the appropriate type. The type of the result is determined by P.
5168 const Permute &P, SDValue Op0, SDValue Op1) {
5169 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5170 // elements of a PACK are twice as wide as the outputs.
5171 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5172 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5173 P.Operand);
5174 // Cast both operands to the appropriate type.
5175 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5176 SystemZ::VectorBytes / InBytes);
5177 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5178 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5179 SDValue Op;
5180 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5181 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5182 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5183 } else if (P.Opcode == SystemZISD::PACK) {
5184 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5185 SystemZ::VectorBytes / P.Operand);
5186 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5187 } else {
5188 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5189 }
5190 return Op;
5191}
5192
5193static bool isZeroVector(SDValue N) {
5194 if (N->getOpcode() == ISD::BITCAST)
5195 N = N->getOperand(0);
5196 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5197 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5198 return Op->getZExtValue() == 0;
5199 return ISD::isBuildVectorAllZeros(N.getNode());
5200}
5201
5202// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5203static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5204 for (unsigned I = 0; I < Num ; I++)
5205 if (isZeroVector(Ops[I]))
5206 return I;
5207 return UINT32_MAX;
5208}
5209
5210// Bytes is a VPERM-like permute vector, except that -1 is used for
5211// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5212// VSLDB or VPERM.
5214 SDValue *Ops,
5215 const SmallVectorImpl<int> &Bytes) {
5216 for (unsigned I = 0; I < 2; ++I)
5217 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5218
5219 // First see whether VSLDB can be used.
5220 unsigned StartIndex, OpNo0, OpNo1;
5221 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5222 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5223 Ops[OpNo1],
5224 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5225
5226 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5227 // eliminate a zero vector by reusing any zero index in the permute vector.
5228 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5229 if (ZeroVecIdx != UINT32_MAX) {
5230 bool MaskFirst = true;
5231 int ZeroIdx = -1;
5232 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5233 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5234 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5235 if (OpNo == ZeroVecIdx && I == 0) {
5236 // If the first byte is zero, use mask as first operand.
5237 ZeroIdx = 0;
5238 break;
5239 }
5240 if (OpNo != ZeroVecIdx && Byte == 0) {
5241 // If mask contains a zero, use it by placing that vector first.
5242 ZeroIdx = I + SystemZ::VectorBytes;
5243 MaskFirst = false;
5244 break;
5245 }
5246 }
5247 if (ZeroIdx != -1) {
5248 SDValue IndexNodes[SystemZ::VectorBytes];
5249 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5250 if (Bytes[I] >= 0) {
5251 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5252 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5253 if (OpNo == ZeroVecIdx)
5254 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5255 else {
5256 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5257 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5258 }
5259 } else
5260 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5261 }
5262 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5263 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5264 if (MaskFirst)
5265 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5266 Mask);
5267 else
5268 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5269 Mask);
5270 }
5271 }
5272
5273 SDValue IndexNodes[SystemZ::VectorBytes];
5274 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5275 if (Bytes[I] >= 0)
5276 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5277 else
5278 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5279 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5280 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5281 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5282}
5283
5284namespace {
5285// Describes a general N-operand vector shuffle.
5286struct GeneralShuffle {
5287 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}
5288 void addUndef();
5289 bool add(SDValue, unsigned);
5290 SDValue getNode(SelectionDAG &, const SDLoc &);
5291 void tryPrepareForUnpack();
5292 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5293 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5294
5295 // The operands of the shuffle.
5297
5298 // Index I is -1 if byte I of the result is undefined. Otherwise the
5299 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5300 // Bytes[I] / SystemZ::VectorBytes.
5302
5303 // The type of the shuffle result.
5304 EVT VT;
5305
5306 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5307 unsigned UnpackFromEltSize;
5308};
5309}
5310
5311// Add an extra undefined element to the shuffle.
5312void GeneralShuffle::addUndef() {
5313 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5314 for (unsigned I = 0; I < BytesPerElement; ++I)
5315 Bytes.push_back(-1);
5316}
5317
5318// Add an extra element to the shuffle, taking it from element Elem of Op.
5319// A null Op indicates a vector input whose value will be calculated later;
5320// there is at most one such input per shuffle and it always has the same
5321// type as the result. Aborts and returns false if the source vector elements
5322// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
5323// LLVM they become implicitly extended, but this is rare and not optimized.
5324bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
5325 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5326
5327 // The source vector can have wider elements than the result,
5328 // either through an explicit TRUNCATE or because of type legalization.
5329 // We want the least significant part.
5330 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
5331 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
5332
5333 // Return false if the source elements are smaller than their destination
5334 // elements.
5335 if (FromBytesPerElement < BytesPerElement)
5336 return false;
5337
5338 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
5339 (FromBytesPerElement - BytesPerElement));
5340
5341 // Look through things like shuffles and bitcasts.
5342 while (Op.getNode()) {
5343 if (Op.getOpcode() == ISD::BITCAST)
5344 Op = Op.getOperand(0);
5345 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
5346 // See whether the bytes we need come from a contiguous part of one
5347 // operand.
5349 if (!getVPermMask(Op, OpBytes))
5350 break;
5351 int NewByte;
5352 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
5353 break;
5354 if (NewByte < 0) {
5355 addUndef();
5356 return true;
5357 }
5358 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
5359 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
5360 } else if (Op.isUndef()) {
5361 addUndef();
5362 return true;
5363 } else
5364 break;
5365 }
5366
5367 // Make sure that the source of the extraction is in Ops.
5368 unsigned OpNo = 0;
5369 for (; OpNo < Ops.size(); ++OpNo)
5370 if (Ops[OpNo] == Op)
5371 break;
5372 if (OpNo == Ops.size())
5373 Ops.push_back(Op);
5374
5375 // Add the element to Bytes.
5376 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
5377 for (unsigned I = 0; I < BytesPerElement; ++I)
5378 Bytes.push_back(Base + I);
5379
5380 return true;
5381}
5382
5383// Return SDNodes for the completed shuffle.
5384SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
5385 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
5386
5387 if (Ops.size() == 0)
5388 return DAG.getUNDEF(VT);
5389
5390 // Use a single unpack if possible as the last operation.
5391 tryPrepareForUnpack();
5392
5393 // Make sure that there are at least two shuffle operands.
5394 if (Ops.size() == 1)
5395 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
5396
5397 // Create a tree of shuffles, deferring root node until after the loop.
5398 // Try to redistribute the undefined elements of non-root nodes so that
5399 // the non-root shuffles match something like a pack or merge, then adjust
5400 // the parent node's permute vector to compensate for the new order.
5401 // Among other things, this copes with vectors like <2 x i16> that were
5402 // padded with undefined elements during type legalization.
5403 //
5404 // In the best case this redistribution will lead to the whole tree
5405 // using packs and merges. It should rarely be a loss in other cases.
5406 unsigned Stride = 1;
5407 for (; Stride * 2 < Ops.size(); Stride *= 2) {
5408 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
5409 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
5410
5411 // Create a mask for just these two operands.
5413 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5414 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
5415 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
5416 if (OpNo == I)
5417 NewBytes[J] = Byte;
5418 else if (OpNo == I + Stride)
5419 NewBytes[J] = SystemZ::VectorBytes + Byte;
5420 else
5421 NewBytes[J] = -1;
5422 }
5423 // See if it would be better to reorganize NewMask to avoid using VPERM.
5425 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
5426 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
5427 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
5428 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5429 if (NewBytes[J] >= 0) {
5430 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
5431 "Invalid double permute");
5432 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
5433 } else
5434 assert(NewBytesMap[J] < 0 && "Invalid double permute");
5435 }
5436 } else {
5437 // Just use NewBytes on the operands.
5438 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
5439 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
5440 if (NewBytes[J] >= 0)
5441 Bytes[J] = I * SystemZ::VectorBytes + J;
5442 }
5443 }
5444 }
5445
5446 // Now we just have 2 inputs. Put the second operand in Ops[1].
5447 if (Stride > 1) {
5448 Ops[1] = Ops[Stride];
5449 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5450 if (Bytes[I] >= int(SystemZ::VectorBytes))
5451 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
5452 }
5453
5454 // Look for an instruction that can do the permute without resorting
5455 // to VPERM.
5456 unsigned OpNo0, OpNo1;
5457 SDValue Op;
5458 if (unpackWasPrepared() && Ops[1].isUndef())
5459 Op = Ops[0];
5460 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
5461 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
5462 else
5463 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
5464
5465 Op = insertUnpackIfPrepared(DAG, DL, Op);
5466
5467 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5468}
5469
5470#ifndef NDEBUG
5471static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
5472 dbgs() << Msg.c_str() << " { ";
5473 for (unsigned i = 0; i < Bytes.size(); i++)
5474 dbgs() << Bytes[i] << " ";
5475 dbgs() << "}\n";
5476}
5477#endif
5478
5479// If the Bytes vector matches an unpack operation, prepare to do the unpack
5480// after all else by removing the zero vector and the effect of the unpack on
5481// Bytes.
5482void GeneralShuffle::tryPrepareForUnpack() {
5483 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
5484 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
5485 return;
5486
5487 // Only do this if removing the zero vector reduces the depth, otherwise
5488 // the critical path will increase with the final unpack.
5489 if (Ops.size() > 2 &&
5490 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
5491 return;
5492
5493 // Find an unpack that would allow removing the zero vector from Ops.
5494 UnpackFromEltSize = 1;
5495 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
5496 bool MatchUnpack = true;
5498 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
5499 unsigned ToEltSize = UnpackFromEltSize * 2;
5500 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
5501 if (!IsZextByte)
5502 SrcBytes.push_back(Bytes[Elt]);
5503 if (Bytes[Elt] != -1) {
5504 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
5505 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
5506 MatchUnpack = false;
5507 break;
5508 }
5509 }
5510 }
5511 if (MatchUnpack) {
5512 if (Ops.size() == 2) {
5513 // Don't use unpack if a single source operand needs rearrangement.
5514 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
5515 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
5516 UnpackFromEltSize = UINT_MAX;
5517 return;
5518 }
5519 }
5520 break;
5521 }
5522 }
5523 if (UnpackFromEltSize > 4)
5524 return;
5525
5526 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
5527 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
5528 << ".\n";
5529 dumpBytes(Bytes, "Original Bytes vector:"););
5530
5531 // Apply the unpack in reverse to the Bytes array.
5532 unsigned B = 0;
5533 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
5534 Elt += UnpackFromEltSize;
5535 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
5536 Bytes[B] = Bytes[Elt];
5537 }
5538 while (B < SystemZ::VectorBytes)
5539 Bytes[B++] = -1;
5540
5541 // Remove the zero vector from Ops
5542 Ops.erase(&Ops[ZeroVecOpNo]);
5543 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5544 if (Bytes[I] >= 0) {
5545 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5546 if (OpNo > ZeroVecOpNo)
5547 Bytes[I] -= SystemZ::VectorBytes;
5548 }
5549
5550 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
5551 dbgs() << "\n";);
5552}
5553
5554SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
5555 const SDLoc &DL,
5556 SDValue Op) {
5557 if (!unpackWasPrepared())
5558 return Op;
5559 unsigned InBits = UnpackFromEltSize * 8;
5560 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
5561 SystemZ::VectorBits / InBits);
5562 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
5563 unsigned OutBits = InBits * 2;
5564 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
5565 SystemZ::VectorBits / OutBits);
5566 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
5567}
5568
5569// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
5571 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
5572 if (!Op.getOperand(I).isUndef())
5573 return false;
5574 return true;
5575}
5576
5577// Return a vector of type VT that contains Value in the first element.
5578// The other elements don't matter.
5580 SDValue Value) {
5581 // If we have a constant, replicate it to all elements and let the
5582 // BUILD_VECTOR lowering take care of it.
5583 if (Value.getOpcode() == ISD::Constant ||
5584 Value.getOpcode() == ISD::ConstantFP) {
5586 return DAG.getBuildVector(VT, DL, Ops);
5587 }
5588 if (Value.isUndef())
5589 return DAG.getUNDEF(VT);
5590 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
5591}
5592
5593// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
5594// element 1. Used for cases in which replication is cheap.
5596 SDValue Op0, SDValue Op1) {
5597 if (Op0.isUndef()) {
5598 if (Op1.isUndef())
5599 return DAG.getUNDEF(VT);
5600 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
5601 }
5602 if (Op1.isUndef())
5603 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
5604 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
5605 buildScalarToVector(DAG, DL, VT, Op0),
5606 buildScalarToVector(DAG, DL, VT, Op1));
5607}
5608
5609// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
5610// vector for them.
5612 SDValue Op1) {
5613 if (Op0.isUndef() && Op1.isUndef())
5614 return DAG.getUNDEF(MVT::v2i64);
5615 // If one of the two inputs is undefined then replicate the other one,
5616 // in order to avoid using another register unnecessarily.
5617 if (Op0.isUndef())
5618 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5619 else if (Op1.isUndef())
5620 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5621 else {
5622 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5623 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5624 }
5625 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
5626}
5627
5628// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
5629// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
5630// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
5631// would benefit from this representation and return it if so.
5633 BuildVectorSDNode *BVN) {
5634 EVT VT = BVN->getValueType(0);
5635 unsigned NumElements = VT.getVectorNumElements();
5636
5637 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
5638 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
5639 // need a BUILD_VECTOR, add an additional placeholder operand for that
5640 // BUILD_VECTOR and store its operands in ResidueOps.
5641 GeneralShuffle GS(VT);
5643 bool FoundOne = false;
5644 for (unsigned I = 0; I < NumElements; ++I) {
5645 SDValue Op = BVN->getOperand(I);
5646 if (Op.getOpcode() == ISD::TRUNCATE)
5647 Op = Op.getOperand(0);
5648 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5649 Op.getOperand(1).getOpcode() == ISD::Constant) {
5650 unsigned Elem = Op.getConstantOperandVal(1);
5651 if (!GS.add(Op.getOperand(0), Elem))
5652 return SDValue();
5653 FoundOne = true;
5654 } else if (Op.isUndef()) {
5655 GS.addUndef();
5656 } else {
5657 if (!GS.add(SDValue(), ResidueOps.size()))
5658 return SDValue();
5659 ResidueOps.push_back(BVN->getOperand(I));
5660 }
5661 }
5662
5663 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
5664 if (!FoundOne)
5665 return SDValue();
5666
5667 // Create the BUILD_VECTOR for the remaining elements, if any.
5668 if (!ResidueOps.empty()) {
5669 while (ResidueOps.size() < NumElements)
5670 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
5671 for (auto &Op : GS.Ops) {
5672 if (!Op.getNode()) {
5673 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
5674 break;
5675 }
5676 }
5677 }
5678 return GS.getNode(DAG, SDLoc(BVN));
5679}
5680
5681bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
5682 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
5683 return true;
5684 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
5685 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
5686 return true;
5687 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
5688 return true;
5689 return false;
5690}
5691
5692// Combine GPR scalar values Elems into a vector of type VT.
5693SDValue
5694SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5695 SmallVectorImpl<SDValue> &Elems) const {
5696 // See whether there is a single replicated value.
5698 unsigned int NumElements = Elems.size();
5699 unsigned int Count = 0;
5700 for (auto Elem : Elems) {
5701 if (!Elem.isUndef()) {
5702 if (!Single.getNode())
5703 Single = Elem;
5704 else if (Elem != Single) {
5705 Single = SDValue();
5706 break;
5707 }
5708 Count += 1;
5709 }
5710 }
5711 // There are three cases here:
5712 //
5713 // - if the only defined element is a loaded one, the best sequence
5714 // is a replicating load.
5715 //
5716 // - otherwise, if the only defined element is an i64 value, we will
5717 // end up with the same VLVGP sequence regardless of whether we short-cut
5718 // for replication or fall through to the later code.
5719 //
5720 // - otherwise, if the only defined element is an i32 or smaller value,
5721 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
5722 // This is only a win if the single defined element is used more than once.
5723 // In other cases we're better off using a single VLVGx.
5724 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
5725 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
5726
5727 // If all elements are loads, use VLREP/VLEs (below).
5728 bool AllLoads = true;
5729 for (auto Elem : Elems)
5730 if (!isVectorElementLoad(Elem)) {
5731 AllLoads = false;
5732 break;
5733 }
5734
5735 // The best way of building a v2i64 from two i64s is to use VLVGP.
5736 if (VT == MVT::v2i64 && !AllLoads)
5737 return joinDwords(DAG, DL, Elems[0], Elems[1]);
5738
5739 // Use a 64-bit merge high to combine two doubles.
5740 if (VT == MVT::v2f64 && !AllLoads)
5741 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5742
5743 // Build v4f32 values directly from the FPRs:
5744 //
5745 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
5746 // V V VMRHF
5747 // <ABxx> <CDxx>
5748 // V VMRHG
5749 // <ABCD>
5750 if (VT == MVT::v4f32 && !AllLoads) {
5751 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5752 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
5753 // Avoid unnecessary undefs by reusing the other operand.
5754 if (Op01.isUndef())
5755 Op01 = Op23;
5756 else if (Op23.isUndef())
5757 Op23 = Op01;
5758 // Merging identical replications is a no-op.
5759 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
5760 return Op01;
5761 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
5762 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
5764 DL, MVT::v2i64, Op01, Op23);
5765 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5766 }
5767
5768 // Collect the constant terms.
5771
5772 unsigned NumConstants = 0;
5773 for (unsigned I = 0; I < NumElements; ++I) {
5774 SDValue Elem = Elems[I];
5775 if (Elem.getOpcode() == ISD::Constant ||
5776 Elem.getOpcode() == ISD::ConstantFP) {
5777 NumConstants += 1;
5778 Constants[I] = Elem;
5779 Done[I] = true;
5780 }
5781 }
5782 // If there was at least one constant, fill in the other elements of
5783 // Constants with undefs to get a full vector constant and use that
5784 // as the starting point.
5786 SDValue ReplicatedVal;
5787 if (NumConstants > 0) {
5788 for (unsigned I = 0; I < NumElements; ++I)
5789 if (!Constants[I].getNode())
5790 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
5791 Result = DAG.getBuildVector(VT, DL, Constants);
5792 } else {
5793 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
5794 // avoid a false dependency on any previous contents of the vector
5795 // register.
5796
5797 // Use a VLREP if at least one element is a load. Make sure to replicate
5798 // the load with the most elements having its value.
5799 std::map<const SDNode*, unsigned> UseCounts;
5800 SDNode *LoadMaxUses = nullptr;
5801 for (unsigned I = 0; I < NumElements; ++I)
5802 if (isVectorElementLoad(Elems[I])) {
5803 SDNode *Ld = Elems[I].getNode();
5804 UseCounts[Ld]++;
5805 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
5806 LoadMaxUses = Ld;
5807 }
5808 if (LoadMaxUses != nullptr) {
5809 ReplicatedVal = SDValue(LoadMaxUses, 0);
5810 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
5811 } else {
5812 // Try to use VLVGP.
5813 unsigned I1 = NumElements / 2 - 1;
5814 unsigned I2 = NumElements - 1;
5815 bool Def1 = !Elems[I1].isUndef();
5816 bool Def2 = !Elems[I2].isUndef();
5817 if (Def1 || Def2) {
5818 SDValue Elem1 = Elems[Def1 ? I1 : I2];
5819 SDValue Elem2 = Elems[Def2 ? I2 : I1];
5820 Result = DAG.getNode(ISD::BITCAST, DL, VT,
5821 joinDwords(DAG, DL, Elem1, Elem2));
5822 Done[I1] = true;
5823 Done[I2] = true;
5824 } else
5825 Result = DAG.getUNDEF(VT);
5826 }
5827 }
5828
5829 // Use VLVGx to insert the other elements.
5830 for (unsigned I = 0; I < NumElements; ++I)
5831 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
5832 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
5833 DAG.getConstant(I, DL, MVT::i32));
5834 return Result;
5835}
5836
5837SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
5838 SelectionDAG &DAG) const {
5839 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
5840 SDLoc DL(Op);
5841 EVT VT = Op.getValueType();
5842
5843 if (BVN->isConstant()) {
5844 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
5845 return Op;
5846
5847 // Fall back to loading it from memory.
5848 return SDValue();
5849 }
5850
5851 // See if we should use shuffles to construct the vector from other vectors.
5852 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
5853 return Res;
5854
5855 // Detect SCALAR_TO_VECTOR conversions.
5857 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
5858
5859 // Otherwise use buildVector to build the vector up from GPRs.
5860 unsigned NumElements = Op.getNumOperands();
5862 for (unsigned I = 0; I < NumElements; ++I)
5863 Ops[I] = Op.getOperand(I);
5864 return buildVector(DAG, DL, VT, Ops);
5865}
5866
5867SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
5868 SelectionDAG &DAG) const {
5869 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
5870 SDLoc DL(Op);
5871 EVT VT = Op.getValueType();
5872 unsigned NumElements = VT.getVectorNumElements();
5873
5874 if (VSN->isSplat()) {
5875 SDValue Op0 = Op.getOperand(0);
5876 unsigned Index = VSN->getSplatIndex();
5878 "Splat index should be defined and in first operand");
5879 // See whether the value we're splatting is directly available as a scalar.
5880 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5882 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
5883 // Otherwise keep it as a vector-to-vector operation.
5884 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
5885 DAG.getTargetConstant(Index, DL, MVT::i32));
5886 }
5887
5888 GeneralShuffle GS(VT);
5889 for (unsigned I = 0; I < NumElements; ++I) {
5890 int Elt = VSN->getMaskElt(I);
5891 if (Elt < 0)
5892 GS.addUndef();
5893 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
5894 unsigned(Elt) % NumElements))
5895 return SDValue();
5896 }
5897 return GS.getNode(DAG, SDLoc(VSN));
5898}
5899
5900SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
5901 SelectionDAG &DAG) const {
5902 SDLoc DL(Op);
5903 // Just insert the scalar into element 0 of an undefined vector.
5904 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
5905 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
5906 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
5907}
5908
5909SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
5910 SelectionDAG &DAG) const {
5911 // Handle insertions of floating-point values.
5912 SDLoc DL(Op);
5913 SDValue Op0 = Op.getOperand(0);
5914 SDValue Op1 = Op.getOperand(1);
5915 SDValue Op2 = Op.getOperand(2);
5916 EVT VT = Op.getValueType();
5917
5918 // Insertions into constant indices of a v2f64 can be done using VPDI.
5919 // However, if the inserted value is a bitcast or a constant then it's
5920 // better to use GPRs, as below.
5921 if (VT == MVT::v2f64 &&
5922 Op1.getOpcode() != ISD::BITCAST &&
5923 Op1.getOpcode() != ISD::ConstantFP &&
5924 Op2.getOpcode() == ISD::Constant) {
5925 uint64_t Index = Op2->getAsZExtVal();
5926 unsigned Mask = VT.getVectorNumElements() - 1;
5927 if (Index <= Mask)
5928 return Op;
5929 }
5930
5931 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
5933 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
5934 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
5935 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
5936 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
5937 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5938}
5939
5940SDValue
5941SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
5942 SelectionDAG &DAG) const {
5943 // Handle extractions of floating-point values.
5944 SDLoc DL(Op);
5945 SDValue Op0 = Op.getOperand(0);
5946 SDValue Op1 = Op.getOperand(1);
5947 EVT VT = Op.getValueType();
5948 EVT VecVT = Op0.getValueType();
5949
5950 // Extractions of constant indices can be done directly.
5951 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
5952 uint64_t Index = CIndexN->getZExtValue();
5953 unsigned Mask = VecVT.getVectorNumElements() - 1;
5954 if (Index <= Mask)
5955 return Op;
5956 }
5957
5958 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
5959 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
5960 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
5961 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
5962 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
5963 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5964}
5965
5966SDValue SystemZTargetLowering::
5967lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5968 SDValue PackedOp = Op.getOperand(0);
5969 EVT OutVT = Op.getValueType();
5970 EVT InVT = PackedOp.getValueType();
5971 unsigned ToBits = OutVT.getScalarSizeInBits();
5972 unsigned FromBits = InVT.getScalarSizeInBits();
5973 do {
5974 FromBits *= 2;
5975 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
5976 SystemZ::VectorBits / FromBits);
5977 PackedOp =
5978 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
5979 } while (FromBits != ToBits);
5980 return PackedOp;
5981}
5982
5983// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
5984SDValue SystemZTargetLowering::
5985lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5986 SDValue PackedOp = Op.getOperand(0);
5987 SDLoc DL(Op);
5988 EVT OutVT = Op.getValueType();
5989 EVT InVT = PackedOp.getValueType();
5990 unsigned InNumElts = InVT.getVectorNumElements();
5991 unsigned OutNumElts = OutVT.getVectorNumElements();
5992 unsigned NumInPerOut = InNumElts / OutNumElts;
5993
5994 SDValue ZeroVec =
5995 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
5996
5997 SmallVector<int, 16> Mask(InNumElts);
5998 unsigned ZeroVecElt = InNumElts;
5999 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
6000 unsigned MaskElt = PackedElt * NumInPerOut;
6001 unsigned End = MaskElt + NumInPerOut - 1;
6002 for (; MaskElt < End; MaskElt++)
6003 Mask[MaskElt] = ZeroVecElt++;
6004 Mask[MaskElt] = PackedElt;
6005 }
6006 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
6007 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
6008}
6009
6010SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6011 unsigned ByScalar) const {
6012 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6013 SDValue Op0 = Op.getOperand(0);
6014 SDValue Op1 = Op.getOperand(1);
6015 SDLoc DL(Op);
6016 EVT VT = Op.getValueType();
6017 unsigned ElemBitSize = VT.getScalarSizeInBits();
6018
6019 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6020 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6021 APInt SplatBits, SplatUndef;
6022 unsigned SplatBitSize;
6023 bool HasAnyUndefs;
6024 // Check for constant splats. Use ElemBitSize as the minimum element
6025 // width and reject splats that need wider elements.
6026 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6027 ElemBitSize, true) &&
6028 SplatBitSize == ElemBitSize) {
6029 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6030 DL, MVT::i32);
6031 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6032 }
6033 // Check for variable splats.
6034 BitVector UndefElements;
6035 SDValue Splat = BVN->getSplatValue(&UndefElements);
6036 if (Splat) {
6037 // Since i32 is the smallest legal type, we either need a no-op
6038 // or a truncation.
6039 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6040 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6041 }
6042 }
6043
6044 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6045 // and the shift amount is directly available in a GPR.
6046 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6047 if (VSN->isSplat()) {
6048 SDValue VSNOp0 = VSN->getOperand(0);
6049 unsigned Index = VSN->getSplatIndex();
6051 "Splat index should be defined and in first operand");
6052 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6053 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6054 // Since i32 is the smallest legal type, we either need a no-op
6055 // or a truncation.
6056 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6057 VSNOp0.getOperand(Index));
6058 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6059 }
6060 }
6061 }
6062
6063 // Otherwise just treat the current form as legal.
6064 return Op;
6065}
6066
6067SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
6068 SelectionDAG &DAG) const {
6069 SDLoc DL(Op);
6070 MVT ResultVT = Op.getSimpleValueType();
6071 SDValue Arg = Op.getOperand(0);
6072 unsigned Check = Op.getConstantOperandVal(1);
6073
6074 unsigned TDCMask = 0;
6075 if (Check & fcSNan)
6077 if (Check & fcQNan)
6079 if (Check & fcPosInf)
6081 if (Check & fcNegInf)
6083 if (Check & fcPosNormal)
6085 if (Check & fcNegNormal)
6087 if (Check & fcPosSubnormal)
6089 if (Check & fcNegSubnormal)
6091 if (Check & fcPosZero)
6092 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
6093 if (Check & fcNegZero)
6094 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
6095 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
6096
6097 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
6098 return getCCResult(DAG, Intr);
6099}
6100
6101SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
6102 SelectionDAG &DAG) const {
6103 SDLoc DL(Op);
6104 SDValue Chain = Op.getOperand(0);
6105
6106 // STCKF only supports a memory operand, so we have to use a temporary.
6107 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
6108 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
6109 MachinePointerInfo MPI =
6111
6112 // Use STCFK to store the TOD clock into the temporary.
6113 SDValue StoreOps[] = {Chain, StackPtr};
6114 Chain = DAG.getMemIntrinsicNode(
6115 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
6117
6118 // And read it back from there.
6119 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
6120}
6121
6123 SelectionDAG &DAG) const {
6124 switch (Op.getOpcode()) {
6125 case ISD::FRAMEADDR:
6126 return lowerFRAMEADDR(Op, DAG);
6127 case ISD::RETURNADDR:
6128 return lowerRETURNADDR(Op, DAG);
6129 case ISD::BR_CC:
6130 return lowerBR_CC(Op, DAG);
6131 case ISD::SELECT_CC:
6132 return lowerSELECT_CC(Op, DAG);
6133 case ISD::SETCC:
6134 return lowerSETCC(Op, DAG);
6135 case ISD::STRICT_FSETCC:
6136 return lowerSTRICT_FSETCC(Op, DAG, false);
6138 return lowerSTRICT_FSETCC(Op, DAG, true);
6139 case ISD::GlobalAddress:
6140 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
6142 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
6143 case ISD::BlockAddress:
6144 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
6145 case ISD::JumpTable:
6146 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
6147 case ISD::ConstantPool:
6148 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
6149 case ISD::BITCAST:
6150 return lowerBITCAST(Op, DAG);
6151 case ISD::VASTART:
6152 return lowerVASTART(Op, DAG);
6153 case ISD::VACOPY:
6154 return lowerVACOPY(Op, DAG);
6156 return lowerDYNAMIC_STACKALLOC(Op, DAG);
6158 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
6159 case ISD::SMUL_LOHI:
6160 return lowerSMUL_LOHI(Op, DAG);
6161 case ISD::UMUL_LOHI:
6162 return lowerUMUL_LOHI(Op, DAG);
6163 case ISD::SDIVREM:
6164 return lowerSDIVREM(Op, DAG);
6165 case ISD::UDIVREM:
6166 return lowerUDIVREM(Op, DAG);
6167 case ISD::SADDO:
6168 case ISD::SSUBO:
6169 case ISD::UADDO:
6170 case ISD::USUBO:
6171 return lowerXALUO(Op, DAG);
6172 case ISD::UADDO_CARRY:
6173 case ISD::USUBO_CARRY:
6174 return lowerUADDSUBO_CARRY(Op, DAG);
6175 case ISD::OR:
6176 return lowerOR(Op, DAG);
6177 case ISD::CTPOP:
6178 return lowerCTPOP(Op, DAG);
6179 case ISD::VECREDUCE_ADD:
6180 return lowerVECREDUCE_ADD(Op, DAG);
6181 case ISD::ATOMIC_FENCE:
6182 return lowerATOMIC_FENCE(Op, DAG);
6183 case ISD::ATOMIC_SWAP:
6184 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
6185 case ISD::ATOMIC_STORE:
6186 case ISD::ATOMIC_LOAD:
6187 return lowerATOMIC_LDST_I128(Op, DAG);
6189 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
6191 return lowerATOMIC_LOAD_SUB(Op, DAG);
6193 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
6195 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
6197 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
6199 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
6201 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
6203 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
6205 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
6207 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
6209 return lowerATOMIC_CMP_SWAP(Op, DAG);
6210 case ISD::STACKSAVE:
6211 return lowerSTACKSAVE(Op, DAG);
6212 case ISD::STACKRESTORE:
6213 return lowerSTACKRESTORE(Op, DAG);
6214 case ISD::PREFETCH:
6215 return lowerPREFETCH(Op, DAG);
6217 return lowerINTRINSIC_W_CHAIN(Op, DAG);
6219 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
6220 case ISD::BUILD_VECTOR:
6221 return lowerBUILD_VECTOR(Op, DAG);
6223 return lowerVECTOR_SHUFFLE(Op, DAG);
6225 return lowerSCALAR_TO_VECTOR(Op, DAG);
6227 return lowerINSERT_VECTOR_ELT(Op, DAG);
6229 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6231 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
6233 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
6234 case ISD::SHL:
6235 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
6236 case ISD::SRL:
6237 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
6238 case ISD::SRA:
6239 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
6240 case ISD::ROTL:
6241 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
6242 case ISD::IS_FPCLASS:
6243 return lowerIS_FPCLASS(Op, DAG);
6244 case ISD::GET_ROUNDING:
6245 return lowerGET_ROUNDING(Op, DAG);
6247 return lowerREADCYCLECOUNTER(Op, DAG);
6248 default:
6249 llvm_unreachable("Unexpected node to lower");
6250 }
6251}
6252
6254 const SDLoc &SL) {
6255 // If i128 is legal, just use a normal bitcast.
6256 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
6257 return DAG.getBitcast(MVT::f128, Src);
6258
6259 // Otherwise, f128 must live in FP128, so do a partwise move.
6261 &SystemZ::FP128BitRegClass);
6262
6263 SDValue Hi, Lo;
6264 std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);
6265
6266 Hi = DAG.getBitcast(MVT::f64, Hi);
6267 Lo = DAG.getBitcast(MVT::f64, Lo);
6268
6269 SDNode *Pair = DAG.getMachineNode(
6270 SystemZ::REG_SEQUENCE, SL, MVT::f128,
6271 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo,
6272 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi,
6273 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)});
6274 return SDValue(Pair, 0);
6275}
6276
6278 const SDLoc &SL) {
6279 // If i128 is legal, just use a normal bitcast.
6280 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
6281 return DAG.getBitcast(MVT::i128, Src);
6282
6283 // Otherwise, f128 must live in FP128, so do a partwise move.
6285 &SystemZ::FP128BitRegClass);
6286
6287 SDValue LoFP =
6288 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
6289 SDValue HiFP =
6290 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);
6291 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
6292 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
6293
6294 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);
6295}
6296
6297// Lower operations with invalid operand or result types (currently used
6298// only for 128-bit integer types).
6299void
6302 SelectionDAG &DAG) const {
6303 switch (N->getOpcode()) {
6304 case ISD::ATOMIC_LOAD: {
6305 SDLoc DL(N);
6306 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
6307 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
6308 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6310 DL, Tys, Ops, MVT::i128, MMO);
6311
6312 SDValue Lowered = lowerGR128ToI128(DAG, Res);
6313 if (N->getValueType(0) == MVT::f128)
6314 Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);
6315 Results.push_back(Lowered);
6316 Results.push_back(Res.getValue(1));
6317 break;
6318 }
6319 case ISD::ATOMIC_STORE: {
6320 SDLoc DL(N);
6321 SDVTList Tys = DAG.getVTList(MVT::Other);
6322 SDValue Val = N->getOperand(1);
6323 if (Val.getValueType() == MVT::f128)
6324 Val = expandBitCastF128ToI128(DAG, Val, DL);
6325 Val = lowerI128ToGR128(DAG, Val);
6326
6327 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
6328 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6330 DL, Tys, Ops, MVT::i128, MMO);
6331 // We have to enforce sequential consistency by performing a
6332 // serialization operation after the store.
6333 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
6335 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
6336 MVT::Other, Res), 0);
6337 Results.push_back(Res);
6338 break;
6339 }
6341 SDLoc DL(N);
6342 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
6343 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
6344 lowerI128ToGR128(DAG, N->getOperand(2)),
6345 lowerI128ToGR128(DAG, N->getOperand(3)) };
6346 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6348 DL, Tys, Ops, MVT::i128, MMO);
6349 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
6351 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
6352 Results.push_back(lowerGR128ToI128(DAG, Res));
6353 Results.push_back(Success);
6354 Results.push_back(Res.getValue(2));
6355 break;
6356 }
6357 case ISD::BITCAST: {
6358 SDValue Src = N->getOperand(0);
6359 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
6360 !useSoftFloat()) {
6361 SDLoc DL(N);
6362 Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));
6363 }
6364 break;
6365 }
6366 default:
6367 llvm_unreachable("Unexpected node to lower");
6368 }
6369}
6370
6371void
6374 SelectionDAG &DAG) const {
6375 return LowerOperationWrapper(N, Results, DAG);
6376}
6377
6378const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
6379#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
6380 switch ((SystemZISD::NodeType)Opcode) {
6381 case SystemZISD::FIRST_NUMBER: break;
6382 OPCODE(RET_GLUE);
6383 OPCODE(CALL);
6384 OPCODE(SIBCALL);
6385 OPCODE(TLS_GDCALL);
6386 OPCODE(TLS_LDCALL);
6387 OPCODE(PCREL_WRAPPER);
6388 OPCODE(PCREL_OFFSET);
6389 OPCODE(ICMP);
6390 OPCODE(FCMP);
6391 OPCODE(STRICT_FCMP);
6392 OPCODE(STRICT_FCMPS);
6393 OPCODE(TM);
6394 OPCODE(BR_CCMASK);
6395 OPCODE(SELECT_CCMASK);
6396 OPCODE(ADJDYNALLOC);
6397 OPCODE(PROBED_ALLOCA);
6398 OPCODE(POPCNT);
6399 OPCODE(SMUL_LOHI);
6400 OPCODE(UMUL_LOHI);
6401 OPCODE(SDIVREM);
6402 OPCODE(UDIVREM);
6403 OPCODE(SADDO);
6404 OPCODE(SSUBO);
6405 OPCODE(UADDO);
6406 OPCODE(USUBO);
6407 OPCODE(ADDCARRY);
6408 OPCODE(SUBCARRY);
6409 OPCODE(GET_CCMASK);
6410 OPCODE(MVC);
6411 OPCODE(NC);
6412 OPCODE(OC);
6413 OPCODE(XC);
6414 OPCODE(CLC);
6415 OPCODE(MEMSET_MVC);
6416 OPCODE(STPCPY);
6417 OPCODE(STRCMP);
6418 OPCODE(SEARCH_STRING);
6419 OPCODE(IPM);
6420 OPCODE(TBEGIN);
6421 OPCODE(TBEGIN_NOFLOAT);
6422 OPCODE(TEND);
6423 OPCODE(BYTE_MASK);
6424 OPCODE(ROTATE_MASK);
6425 OPCODE(REPLICATE);
6426 OPCODE(JOIN_DWORDS);
6427 OPCODE(SPLAT);
6428 OPCODE(MERGE_HIGH);
6429 OPCODE(MERGE_LOW);
6430 OPCODE(SHL_DOUBLE);
6431 OPCODE(PERMUTE_DWORDS);
6432 OPCODE(PERMUTE);
6433 OPCODE(PACK);
6434 OPCODE(PACKS_CC);
6435 OPCODE(PACKLS_CC);
6436 OPCODE(UNPACK_HIGH);
6437 OPCODE(UNPACKL_HIGH);
6438 OPCODE(UNPACK_LOW);
6439 OPCODE(UNPACKL_LOW);
6440 OPCODE(VSHL_BY_SCALAR);
6441 OPCODE(VSRL_BY_SCALAR);
6442 OPCODE(VSRA_BY_SCALAR);
6443 OPCODE(VROTL_BY_SCALAR);
6444 OPCODE(VSUM);
6445 OPCODE(VACC);
6446 OPCODE(VSCBI);
6447 OPCODE(VAC);
6448 OPCODE(VSBI);
6449 OPCODE(VACCC);
6450 OPCODE(VSBCBI);
6451 OPCODE(VICMPE);
6452 OPCODE(VICMPH);
6453 OPCODE(VICMPHL);
6454 OPCODE(VICMPES);
6455 OPCODE(VICMPHS);
6456 OPCODE(VICMPHLS);
6457 OPCODE(VFCMPE);
6458 OPCODE(STRICT_VFCMPE);
6459 OPCODE(STRICT_VFCMPES);
6460 OPCODE(VFCMPH);
6461 OPCODE(STRICT_VFCMPH);
6462 OPCODE(STRICT_VFCMPHS);
6463 OPCODE(VFCMPHE);
6464 OPCODE(STRICT_VFCMPHE);
6465 OPCODE(STRICT_VFCMPHES);
6466 OPCODE(VFCMPES);
6467 OPCODE(VFCMPHS);
6468 OPCODE(VFCMPHES);
6469 OPCODE(VFTCI);
6470 OPCODE(VEXTEND);
6471 OPCODE(STRICT_VEXTEND);
6472 OPCODE(VROUND);
6473 OPCODE(STRICT_VROUND);
6474 OPCODE(VTM);
6475 OPCODE(SCMP128HI);
6476 OPCODE(UCMP128HI);
6477 OPCODE(VFAE_CC);
6478 OPCODE(VFAEZ_CC);
6479 OPCODE(VFEE_CC);
6480 OPCODE(VFEEZ_CC);
6481 OPCODE(VFENE_CC);
6482 OPCODE(VFENEZ_CC);
6483 OPCODE(VISTR_CC);
6484 OPCODE(VSTRC_CC);
6485 OPCODE(VSTRCZ_CC);
6486 OPCODE(VSTRS_CC);
6487 OPCODE(VSTRSZ_CC);
6488 OPCODE(TDC);
6489 OPCODE(ATOMIC_SWAPW);
6490 OPCODE(ATOMIC_LOADW_ADD);
6491 OPCODE(ATOMIC_LOADW_SUB);
6492 OPCODE(ATOMIC_LOADW_AND);
6493 OPCODE(ATOMIC_LOADW_OR);
6494 OPCODE(ATOMIC_LOADW_XOR);
6495 OPCODE(ATOMIC_LOADW_NAND);
6496 OPCODE(ATOMIC_LOADW_MIN);
6497 OPCODE(ATOMIC_LOADW_MAX);
6498 OPCODE(ATOMIC_LOADW_UMIN);
6499 OPCODE(ATOMIC_LOADW_UMAX);
6500 OPCODE(ATOMIC_CMP_SWAPW);
6501 OPCODE(ATOMIC_CMP_SWAP);
6502 OPCODE(ATOMIC_LOAD_128);
6503 OPCODE(ATOMIC_STORE_128);
6504 OPCODE(ATOMIC_CMP_SWAP_128);
6505 OPCODE(LRV);
6506 OPCODE(STRV);
6507 OPCODE(VLER);
6508 OPCODE(VSTER);
6509 OPCODE(STCKF);
6511 OPCODE(ADA_ENTRY);
6512 }
6513 return nullptr;
6514#undef OPCODE
6515}
6516
6517// Return true if VT is a vector whose elements are a whole number of bytes
6518// in width. Also check for presence of vector support.
6519bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
6520 if (!Subtarget.hasVector())
6521 return false;
6522
6523 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
6524}
6525
6526// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
6527// producing a result of type ResVT. Op is a possibly bitcast version
6528// of the input vector and Index is the index (based on type VecVT) that
6529// should be extracted. Return the new extraction if a simplification
6530// was possible or if Force is true.
6531SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
6532 EVT VecVT, SDValue Op,
6533 unsigned Index,
6534 DAGCombinerInfo &DCI,
6535 bool Force) const {
6536 SelectionDAG &DAG = DCI.DAG;
6537
6538 // The number of bytes being extracted.
6539 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6540
6541 for (;;) {
6542 unsigned Opcode = Op.getOpcode();
6543 if (Opcode == ISD::BITCAST)
6544 // Look through bitcasts.
6545 Op = Op.getOperand(0);
6546 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
6547 canTreatAsByteVector(Op.getValueType())) {
6548 // Get a VPERM-like permute mask and see whether the bytes covered
6549 // by the extracted element are a contiguous sequence from one
6550 // source operand.
6552 if (!getVPermMask(Op, Bytes))
6553 break;
6554 int First;
6555 if (!getShuffleInput(Bytes, Index * BytesPerElement,
6556 BytesPerElement, First))
6557 break;
6558 if (First < 0)
6559 return DAG.getUNDEF(ResVT);
6560 // Make sure the contiguous sequence starts at a multiple of the
6561 // original element size.
6562 unsigned Byte = unsigned(First) % Bytes.size();
6563 if (Byte % BytesPerElement != 0)
6564 break;
6565 // We can get the extracted value directly from an input.
6566 Index = Byte / BytesPerElement;
6567 Op = Op.getOperand(unsigned(First) / Bytes.size());
6568 Force = true;
6569 } else if (Opcode == ISD::BUILD_VECTOR &&
6570 canTreatAsByteVector(Op.getValueType())) {
6571 // We can only optimize this case if the BUILD_VECTOR elements are
6572 // at least as wide as the extracted value.
6573 EVT OpVT = Op.getValueType();
6574 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6575 if (OpBytesPerElement < BytesPerElement)
6576 break;
6577 // Make sure that the least-significant bit of the extracted value
6578 // is the least significant bit of an input.
6579 unsigned End = (Index + 1) * BytesPerElement;
6580 if (End % OpBytesPerElement != 0)
6581 break;
6582 // We're extracting the low part of one operand of the BUILD_VECTOR.
6583 Op = Op.getOperand(End / OpBytesPerElement - 1);
6584 if (!Op.getValueType().isInteger()) {
6585 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
6586 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
6587 DCI.AddToWorklist(Op.getNode());
6588 }
6589 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
6590 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
6591 if (VT != ResVT) {
6592 DCI.AddToWorklist(Op.getNode());
6593 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
6594 }
6595 return Op;
6596 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6598 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6599 canTreatAsByteVector(Op.getValueType()) &&
6600 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
6601 // Make sure that only the unextended bits are significant.
6602 EVT ExtVT = Op.getValueType();
6603 EVT OpVT = Op.getOperand(0).getValueType();
6604 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
6605 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6606 unsigned Byte = Index * BytesPerElement;
6607 unsigned SubByte = Byte % ExtBytesPerElement;
6608 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
6609 if (SubByte < MinSubByte ||
6610 SubByte + BytesPerElement > ExtBytesPerElement)
6611 break;
6612 // Get the byte offset of the unextended element
6613 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
6614 // ...then add the byte offset relative to that element.
6615 Byte += SubByte - MinSubByte;
6616 if (Byte % BytesPerElement != 0)
6617 break;
6618 Op = Op.getOperand(0);
6619 Index = Byte / BytesPerElement;
6620 Force = true;
6621 } else
6622 break;
6623 }
6624 if (Force) {
6625 if (Op.getValueType() != VecVT) {
6626 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
6627 DCI.AddToWorklist(Op.getNode());
6628 }
6629 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
6630 DAG.getConstant(Index, DL, MVT::i32));
6631 }
6632 return SDValue();
6633}
6634
6635// Optimize vector operations in scalar value Op on the basis that Op
6636// is truncated to TruncVT.
6637SDValue SystemZTargetLowering::combineTruncateExtract(
6638 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
6639 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
6640 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
6641 // of type TruncVT.
6642 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6643 TruncVT.getSizeInBits() % 8 == 0) {
6644 SDValue Vec = Op.getOperand(0);
6645 EVT VecVT = Vec.getValueType();
6646 if (canTreatAsByteVector(VecVT)) {
6647 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
6648 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6649 unsigned TruncBytes = TruncVT.getStoreSize();
6650 if (BytesPerElement % TruncBytes == 0) {
6651 // Calculate the value of Y' in the above description. We are
6652 // splitting the original elements into Scale equal-sized pieces
6653 // and for truncation purposes want the last (least-significant)
6654 // of these pieces for IndexN. This is easiest to do by calculating
6655 // the start index of the following element and then subtracting 1.
6656 unsigned Scale = BytesPerElement / TruncBytes;
6657 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
6658
6659 // Defer the creation of the bitcast from X to combineExtract,
6660 // which might be able to optimize the extraction.
6661 VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
6662 VecVT.getStoreSize() / TruncBytes);
6663 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
6664 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
6665 }
6666 }
6667 }
6668 }
6669 return SDValue();
6670}
6671
6672SDValue SystemZTargetLowering::combineZERO_EXTEND(
6673 SDNode *N, DAGCombinerInfo &DCI) const {
6674 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
6675 SelectionDAG &DAG = DCI.DAG;
6676 SDValue N0 = N->getOperand(0);
6677 EVT VT = N->getValueType(0);
6679 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
6680 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6681 if (TrueOp && FalseOp) {
6682 SDLoc DL(N0);
6683 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
6684 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
6685 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
6686 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
6687 // If N0 has multiple uses, change other uses as well.
6688 if (!N0.hasOneUse()) {
6689 SDValue TruncSelect =
6690 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
6691 DCI.CombineTo(N0.getNode(), TruncSelect);
6692 }
6693 return NewSelect;
6694 }
6695 }
6696 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
6697 // of the result is smaller than the size of X and all the truncated bits
6698 // of X are already zero.
6699 if (N0.getOpcode() == ISD::XOR &&
6700 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
6701 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6702 N0.getOperand(1).getOpcode() == ISD::Constant) {
6703 SDValue X = N0.getOperand(0).getOperand(0);
6704 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
6705 KnownBits Known = DAG.computeKnownBits(X);
6706 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
6707 N0.getValueSizeInBits(),
6708 VT.getSizeInBits());
6709 if (TruncatedBits.isSubsetOf(Known.Zero)) {
6710 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
6712 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
6713 X, DAG.getConstant(Mask, SDLoc(N0), VT));
6714 }
6715 }
6716 }
6717
6718 return SDValue();
6719}
6720
6721SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
6722 SDNode *N, DAGCombinerInfo &DCI) const {
6723 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
6724 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
6725 // into (select_cc LHS, RHS, -1, 0, COND)
6726 SelectionDAG &DAG = DCI.DAG;
6727 SDValue N0 = N->getOperand(0);
6728 EVT VT = N->getValueType(0);
6729 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
6730 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
6731 N0 = N0.getOperand(0);
6732 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
6733 SDLoc DL(N0);
6734 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
6735 DAG.getConstant(-1, DL, VT), DAG.getConstant(0, DL, VT),
6736 N0.getOperand(2) };
6737 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
6738 }
6739 return SDValue();
6740}
6741
6742SDValue SystemZTargetLowering::combineSIGN_EXTEND(
6743 SDNode *N, DAGCombinerInfo &DCI) const {
6744 // Convert (sext (ashr (shl X, C1), C2)) to
6745 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
6746 // cheap as narrower ones.
6747 SelectionDAG &DAG = DCI.DAG;
6748 SDValue N0 = N->getOperand(0);
6749 EVT VT = N->getValueType(0);
6750 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
6751 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6752 SDValue Inner = N0.getOperand(0);
6753 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
6754 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
6755 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
6756 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
6757 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
6758 EVT ShiftVT = N0.getOperand(1).getValueType();
6759 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
6760 Inner.getOperand(0));
6761 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
6762 DAG.getConstant(NewShlAmt, SDLoc(Inner),
6763 ShiftVT));
6764 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
6765 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
6766 }
6767 }
6768 }
6769
6770 return SDValue();
6771}
6772
6773SDValue SystemZTargetLowering::combineMERGE(
6774 SDNode *N, DAGCombinerInfo &DCI) const {
6775 SelectionDAG &DAG = DCI.DAG;
6776 unsigned Opcode = N->getOpcode();
6777 SDValue Op0 = N->getOperand(0);
6778 SDValue Op1 = N->getOperand(1);
6779 if (Op0.getOpcode() == ISD::BITCAST)
6780 Op0 = Op0.getOperand(0);
6782 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
6783 // for v4f32.
6784 if (Op1 == N->getOperand(0))
6785 return Op1;
6786 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
6787 EVT VT = Op1.getValueType();
6788 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
6789 if (ElemBytes <= 4) {
6790 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
6793 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
6794 SystemZ::VectorBytes / ElemBytes / 2);
6795 if (VT != InVT) {
6796 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
6797 DCI.AddToWorklist(Op1.getNode());
6798 }
6799 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
6800 DCI.AddToWorklist(Op.getNode());
6801 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
6802 }
6803 }
6804 return SDValue();
6805}
6806
6807static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
6808 SDNode *&HiPart) {
6809 LoPart = HiPart = nullptr;
6810
6811 // Scan through all users.
6812 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
6813 UI != UIEnd; ++UI) {
6814 // Skip the uses of the chain.
6815 if (UI.getUse().getResNo() != 0)
6816 continue;
6817
6818 // Verify every user is a TRUNCATE to i64 of the low or high half.
6819 SDNode *User = *UI;
6820 bool IsLoPart = true;
6821 if (User->getOpcode() == ISD::SRL &&
6822 User->getOperand(1).getOpcode() == ISD::Constant &&
6823 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
6824 User = *User->use_begin();
6825 IsLoPart = false;
6826 }
6827 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)
6828 return false;
6829
6830 if (IsLoPart) {
6831 if (LoPart)
6832 return false;
6833 LoPart = User;
6834 } else {
6835 if (HiPart)
6836 return false;
6837 HiPart = User;
6838 }
6839 }
6840 return true;
6841}
6842
6843static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
6844 SDNode *&HiPart) {
6845 LoPart = HiPart = nullptr;
6846
6847 // Scan through all users.
6848 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
6849 UI != UIEnd; ++UI) {
6850 // Skip the uses of the chain.
6851 if (UI.getUse().getResNo() != 0)
6852 continue;
6853
6854 // Verify every user is an EXTRACT_SUBREG of the low or high half.
6855 SDNode *User = *UI;
6856 if (!User->hasOneUse() || !User->isMachineOpcode() ||
6857 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
6858 return false;
6859
6860 switch (User->getConstantOperandVal(1)) {
6861 case SystemZ::subreg_l64:
6862 if (LoPart)
6863 return false;
6864 LoPart = User;
6865 break;
6866 case SystemZ::subreg_h64:
6867 if (HiPart)
6868 return false;
6869 HiPart = User;
6870 break;
6871 default:
6872 return false;
6873 }
6874 }
6875 return true;
6876}
6877
6878SDValue SystemZTargetLowering::combineLOAD(
6879 SDNode *N, DAGCombinerInfo &DCI) const {
6880 SelectionDAG &DAG = DCI.DAG;
6881 EVT LdVT = N->getValueType(0);
6882 SDLoc DL(N);
6883
6884 // Replace a 128-bit load that is used solely to move its value into GPRs
6885 // by separate loads of both halves.
6886 LoadSDNode *LD = cast<LoadSDNode>(N);
6887 if (LD->isSimple() && ISD::isNormalLoad(LD)) {
6888 SDNode *LoPart, *HiPart;
6889 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||
6890 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {
6891 // Rewrite each extraction as an independent load.
6892 SmallVector<SDValue, 2> ArgChains;
6893 if (HiPart) {
6894 SDValue EltLoad = DAG.getLoad(
6895 HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(),
6896 LD->getPointerInfo(), LD->getOriginalAlign(),
6897 LD->getMemOperand()->getFlags(), LD->getAAInfo());
6898
6899 DCI.CombineTo(HiPart, EltLoad, true);
6900 ArgChains.push_back(EltLoad.getValue(1));
6901 }
6902 if (LoPart) {
6903 SDValue EltLoad = DAG.getLoad(
6904 LoPart->getValueType(0), DL, LD->getChain(),
6905 DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)),
6906 LD->getPointerInfo().getWithOffset(8), LD->getOriginalAlign(),
6907 LD->getMemOperand()->getFlags(), LD->getAAInfo());
6908
6909 DCI.CombineTo(LoPart, EltLoad, true);
6910 ArgChains.push_back(EltLoad.getValue(1));
6911 }
6912
6913 // Collect all chains via TokenFactor.
6914 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains);
6915 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
6916 DCI.AddToWorklist(Chain.getNode());
6917 return SDValue(N, 0);
6918 }
6919 }
6920
6921 if (LdVT.isVector() || LdVT.isInteger())
6922 return SDValue();
6923 // Transform a scalar load that is REPLICATEd as well as having other
6924 // use(s) to the form where the other use(s) use the first element of the
6925 // REPLICATE instead of the load. Otherwise instruction selection will not
6926 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
6927 // point loads.
6928
6929 SDValue Replicate;
6930 SmallVector<SDNode*, 8> OtherUses;
6931 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
6932 UI != UE; ++UI) {
6933 if (UI->getOpcode() == SystemZISD::REPLICATE) {
6934 if (Replicate)
6935 return SDValue(); // Should never happen
6936 Replicate = SDValue(*UI, 0);
6937 }
6938 else if (UI.getUse().getResNo() == 0)
6939 OtherUses.push_back(*UI);
6940 }
6941 if (!Replicate || OtherUses.empty())
6942 return SDValue();
6943
6944 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
6945 Replicate, DAG.getConstant(0, DL, MVT::i32));
6946 // Update uses of the loaded Value while preserving old chains.
6947 for (SDNode *U : OtherUses) {
6949 for (SDValue Op : U->ops())
6950 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
6951 DAG.UpdateNodeOperands(U, Ops);
6952 }
6953 return SDValue(N, 0);
6954}
6955
6956bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
6957 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
6958 return true;
6959 if (Subtarget.hasVectorEnhancements2())
6960 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
6961 return true;
6962 return false;
6963}
6964
6966 if (!VT.isVector() || !VT.isSimple() ||
6967 VT.getSizeInBits() != 128 ||
6968 VT.getScalarSizeInBits() % 8 != 0)
6969 return false;
6970
6971 unsigned NumElts = VT.getVectorNumElements();
6972 for (unsigned i = 0; i < NumElts; ++i) {
6973 if (M[i] < 0) continue; // ignore UNDEF indices
6974 if ((unsigned) M[i] != NumElts - 1 - i)
6975 return false;
6976 }
6977
6978 return true;
6979}
6980
6981static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
6982 for (auto *U : StoredVal->uses()) {
6983 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
6984 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
6985 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
6986 continue;
6987 } else if (isa<BuildVectorSDNode>(U)) {
6988 SDValue BuildVector = SDValue(U, 0);
6989 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
6990 isOnlyUsedByStores(BuildVector, DAG))
6991 continue;
6992 }
6993 return false;
6994 }
6995 return true;
6996}
6997
6998static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
6999 SDValue &HiPart) {
7000 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
7001 return false;
7002
7003 SDValue Op0 = Val.getOperand(0);
7004 SDValue Op1 = Val.getOperand(1);
7005
7006 if (Op0.getOpcode() == ISD::SHL)
7007 std::swap(Op0, Op1);
7008 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
7009 Op1.getOperand(1).getOpcode() != ISD::Constant ||
7010 Op1.getConstantOperandVal(1) != 64)
7011 return false;
7012 Op1 = Op1.getOperand(0);
7013
7014 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
7015 Op0.getOperand(0).getValueType() != MVT::i64)
7016 return false;
7017 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
7018 Op1.getOperand(0).getValueType() != MVT::i64)
7019 return false;
7020
7021 LoPart = Op0.getOperand(0);
7022 HiPart = Op1.getOperand(0);
7023 return true;
7024}
7025
7026static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
7027 SDValue &HiPart) {
7028 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
7029 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
7030 return false;
7031
7032 if (Val->getNumOperands() != 5 ||
7033 Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
7034 Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||
7035 Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)
7036 return false;
7037
7038 LoPart = Val->getOperand(1);
7039 HiPart = Val->getOperand(3);
7040 return true;
7041}
7042
7043SDValue SystemZTargetLowering::combineSTORE(
7044 SDNode *N, DAGCombinerInfo &DCI) const {
7045 SelectionDAG &DAG = DCI.DAG;
7046 auto *SN = cast<StoreSDNode>(N);
7047 auto &Op1 = N->getOperand(1);
7048 EVT MemVT = SN->getMemoryVT();
7049 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
7050 // for the extraction to be done on a vMiN value, so that we can use VSTE.
7051 // If X has wider elements then convert it to:
7052 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
7053 if (MemVT.isInteger() && SN->isTruncatingStore()) {
7054 if (SDValue Value =
7055 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
7056 DCI.AddToWorklist(Value.getNode());
7057
7058 // Rewrite the store with the new form of stored value.
7059 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
7060 SN->getBasePtr(), SN->getMemoryVT(),
7061 SN->getMemOperand());
7062 }
7063 }
7064 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
7065 if (!SN->isTruncatingStore() &&
7066 Op1.getOpcode() == ISD::BSWAP &&
7067 Op1.getNode()->hasOneUse() &&
7068 canLoadStoreByteSwapped(Op1.getValueType())) {
7069
7070 SDValue BSwapOp = Op1.getOperand(0);
7071
7072 if (BSwapOp.getValueType() == MVT::i16)
7073 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
7074
7075 SDValue Ops[] = {
7076 N->getOperand(0), BSwapOp, N->getOperand(2)
7077 };
7078
7079 return
7080 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
7081 Ops, MemVT, SN->getMemOperand());
7082 }
7083 // Combine STORE (element-swap) into VSTER
7084 if (!SN->isTruncatingStore() &&
7085 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
7086 Op1.getNode()->hasOneUse() &&
7087 Subtarget.hasVectorEnhancements2()) {
7088 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
7089 ArrayRef<int> ShuffleMask = SVN->getMask();
7090 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
7091 SDValue Ops[] = {
7092 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
7093 };
7094
7096 DAG.getVTList(MVT::Other),
7097 Ops, MemVT, SN->getMemOperand());
7098 }
7099 }
7100
7101 // Combine STORE (READCYCLECOUNTER) into STCKF.
7102 if (!SN->isTruncatingStore() &&
7104 Op1.hasOneUse() &&
7105 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
7106 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
7108 DAG.getVTList(MVT::Other),
7109 Ops, MemVT, SN->getMemOperand());
7110 }
7111
7112 // Transform a store of a 128-bit value moved from parts into two stores.
7113 if (SN->isSimple() && ISD::isNormalStore(SN)) {
7114 SDValue LoPart, HiPart;
7115 if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||
7116 (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {
7117 SDLoc DL(SN);
7118 SDValue Chain0 =
7119 DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(),
7120 SN->getPointerInfo(), SN->getOriginalAlign(),
7121 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7122 SDValue Chain1 =
7123 DAG.getStore(SN->getChain(), DL, LoPart,
7124 DAG.getObjectPtrOffset(DL, SN->getBasePtr(),
7126 SN->getPointerInfo().getWithOffset(8),
7127 SN->getOriginalAlign(),
7128 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7129
7130 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
7131 }
7132 }
7133
7134 // Replicate a reg or immediate with VREP instead of scalar multiply or
7135 // immediate load. It seems best to do this during the first DAGCombine as
7136 // it is straight-forward to handle the zero-extend node in the initial
7137 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
7138 // extracting an i16 element from a v16i8 vector).
7139 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
7140 isOnlyUsedByStores(Op1, DAG)) {
7141 SDValue Word = SDValue();
7142 EVT WordVT;
7143
7144 // Find a replicated immediate and return it if found in Word and its
7145 // type in WordVT.
7146 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
7147 // Some constants are better handled with a scalar store.
7148 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
7149 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
7150 return;
7151 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, C->getZExtValue()));
7152 if (VCI.isVectorConstantLegal(Subtarget) &&
7153 VCI.Opcode == SystemZISD::REPLICATE) {
7154 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
7155 WordVT = VCI.VecVT.getScalarType();
7156 }
7157 };
7158
7159 // Find a replicated register and return it if found in Word and its type
7160 // in WordVT.
7161 auto FindReplicatedReg = [&](SDValue MulOp) {
7162 EVT MulVT = MulOp.getValueType();
7163 if (MulOp->getOpcode() == ISD::MUL &&
7164 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
7165 // Find a zero extended value and its type.
7166 SDValue LHS = MulOp->getOperand(0);
7167 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
7168 WordVT = LHS->getOperand(0).getValueType();
7169 else if (LHS->getOpcode() == ISD::AssertZext)
7170 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
7171 else
7172 return;
7173 // Find a replicating constant, e.g. 0x00010001.
7174 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
7176 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
7177 if (VCI.isVectorConstantLegal(Subtarget) &&
7178 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
7179 WordVT == VCI.VecVT.getScalarType())
7180 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
7181 }
7182 }
7183 };
7184
7185 if (isa<BuildVectorSDNode>(Op1) &&
7186 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
7187 SDValue SplatVal = Op1->getOperand(0);
7188 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
7189 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
7190 else
7191 FindReplicatedReg(SplatVal);
7192 } else {
7193 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
7194 FindReplicatedImm(C, MemVT.getStoreSize());
7195 else
7196 FindReplicatedReg(Op1);
7197 }
7198
7199 if (Word != SDValue()) {
7200 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
7201 "Bad type handling");
7202 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
7203 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
7204 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
7205 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
7206 SN->getBasePtr(), SN->getMemOperand());
7207 }
7208 }
7209
7210 return SDValue();
7211}
7212
7213SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
7214 SDNode *N, DAGCombinerInfo &DCI) const {
7215 SelectionDAG &DAG = DCI.DAG;
7216 // Combine element-swap (LOAD) into VLER
7217 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7218 N->getOperand(0).hasOneUse() &&
7219 Subtarget.hasVectorEnhancements2()) {
7220 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
7221 ArrayRef<int> ShuffleMask = SVN->getMask();
7222 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
7223 SDValue Load = N->getOperand(0);
7224 LoadSDNode *LD = cast<LoadSDNode>(Load);
7225
7226 // Create the element-swapping load.
7227 SDValue Ops[] = {
7228 LD->getChain(), // Chain
7229 LD->getBasePtr() // Ptr
7230 };
7231 SDValue ESLoad =
7233 DAG.getVTList(LD->getValueType(0), MVT::Other),
7234 Ops, LD->getMemoryVT(), LD->getMemOperand());
7235
7236 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
7237 // by the load dead.
7238 DCI.CombineTo(N, ESLoad);
7239
7240 // Next, combine the load away, we give it a bogus result value but a real
7241 // chain result. The result value is dead because the shuffle is dead.
7242 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
7243
7244 // Return N so it doesn't get rechecked!
7245 return SDValue(N, 0);
7246 }
7247 }
7248
7249 return SDValue();
7250}
7251
7252SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
7253 SDNode *N, DAGCombinerInfo &DCI) const {
7254 SelectionDAG &DAG = DCI.DAG;
7255
7256 if (!Subtarget.hasVector())
7257 return SDValue();
7258
7259 // Look through bitcasts that retain the number of vector elements.
7260 SDValue Op = N->getOperand(0);
7261 if (Op.getOpcode() == ISD::BITCAST &&
7262 Op.getValueType().isVector() &&
7263 Op.getOperand(0).getValueType().isVector() &&
7264 Op.getValueType().getVectorNumElements() ==
7265 Op.getOperand(0).getValueType().getVectorNumElements())
7266 Op = Op.getOperand(0);
7267
7268 // Pull BSWAP out of a vector extraction.
7269 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
7270 EVT VecVT = Op.getValueType();
7271 EVT EltVT = VecVT.getVectorElementType();
7272 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
7273 Op.getOperand(0), N->getOperand(1));
7274 DCI.AddToWorklist(Op.getNode());
7275 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
7276 if (EltVT != N->getValueType(0)) {
7277 DCI.AddToWorklist(Op.getNode());
7278 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
7279 }
7280 return Op;
7281 }
7282
7283 // Try to simplify a vector extraction.
7284 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
7285 SDValue Op0 = N->getOperand(0);
7286 EVT VecVT = Op0.getValueType();
7287 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
7288 IndexN->getZExtValue(), DCI, false);
7289 }
7290 return SDValue();
7291}
7292
7293SDValue SystemZTargetLowering::combineJOIN_DWORDS(
7294 SDNode *N, DAGCombinerInfo &DCI) const {
7295 SelectionDAG &DAG = DCI.DAG;
7296 // (join_dwords X, X) == (replicate X)
7297 if (N->getOperand(0) == N->getOperand(1))
7298 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
7299 N->getOperand(0));
7300 return SDValue();
7301}
7302
7304 SDValue Chain1 = N1->getOperand(0);
7305 SDValue Chain2 = N2->getOperand(0);
7306
7307 // Trivial case: both nodes take the same chain.
7308 if (Chain1 == Chain2)
7309 return Chain1;
7310
7311 // FIXME - we could handle more complex cases via TokenFactor,
7312 // assuming we can verify that this would not create a cycle.
7313 return SDValue();
7314}
7315
7316SDValue SystemZTargetLowering::combineFP_ROUND(
7317 SDNode *N, DAGCombinerInfo &DCI) const {
7318
7319 if (!Subtarget.hasVector())
7320 return SDValue();
7321
7322 // (fpround (extract_vector_elt X 0))
7323 // (fpround (extract_vector_elt X 1)) ->
7324 // (extract_vector_elt (VROUND X) 0)
7325 // (extract_vector_elt (VROUND X) 2)
7326 //
7327 // This is a special case since the target doesn't really support v2f32s.
7328 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7329 SelectionDAG &DAG = DCI.DAG;
7330 SDValue Op0 = N->getOperand(OpNo);
7331 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
7333 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
7334 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7335 Op0.getConstantOperandVal(1) == 0) {
7336 SDValue Vec = Op0.getOperand(0);
7337 for (auto *U : Vec->uses()) {
7338 if (U != Op0.getNode() && U->hasOneUse() &&
7339 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7340 U->getOperand(0) == Vec &&
7341 U->getOperand(1).getOpcode() == ISD::Constant &&
7342 U->getConstantOperandVal(1) == 1) {
7343 SDValue OtherRound = SDValue(*U->use_begin(), 0);
7344 if (OtherRound.getOpcode() == N->getOpcode() &&
7345 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
7346 OtherRound.getValueType() == MVT::f32) {
7347 SDValue VRound, Chain;
7348 if (N->isStrictFPOpcode()) {
7349 Chain = MergeInputChains(N, OtherRound.getNode());
7350 if (!Chain)
7351 continue;
7353 {MVT::v4f32, MVT::Other}, {Chain, Vec});
7354 Chain = VRound.getValue(1);
7355 } else
7356 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
7357 MVT::v4f32, Vec);
7358 DCI.AddToWorklist(VRound.getNode());
7359 SDValue Extract1 =
7360 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
7361 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
7362 DCI.AddToWorklist(Extract1.getNode());
7363 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
7364 if (Chain)
7365 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
7366 SDValue Extract0 =
7367 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
7368 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7369 if (Chain)
7370 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7371 N->getVTList(), Extract0, Chain);
7372 return Extract0;
7373 }
7374 }
7375 }
7376 }
7377 return SDValue();
7378}
7379
7380SDValue SystemZTargetLowering::combineFP_EXTEND(
7381 SDNode *N, DAGCombinerInfo &DCI) const {
7382
7383 if (!Subtarget.hasVector())
7384 return SDValue();
7385
7386 // (fpextend (extract_vector_elt X 0))
7387 // (fpextend (extract_vector_elt X 2)) ->
7388 // (extract_vector_elt (VEXTEND X) 0)
7389 // (extract_vector_elt (VEXTEND X) 1)
7390 //
7391 // This is a special case since the target doesn't really support v2f32s.
7392 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7393 SelectionDAG &DAG = DCI.DAG;
7394 SDValue Op0 = N->getOperand(OpNo);
7395 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
7397 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
7398 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7399 Op0.getConstantOperandVal(1) == 0) {
7400 SDValue Vec = Op0.getOperand(0);
7401 for (auto *U : Vec->uses()) {
7402 if (U != Op0.getNode() && U->hasOneUse() &&
7403 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7404 U->getOperand(0) == Vec &&
7405 U->getOperand(1).getOpcode() == ISD::Constant &&
7406 U->getConstantOperandVal(1) == 2) {
7407 SDValue OtherExtend = SDValue(*U->use_begin(), 0);
7408 if (OtherExtend.getOpcode() == N->getOpcode() &&
7409 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
7410 OtherExtend.getValueType() == MVT::f64) {
7411 SDValue VExtend, Chain;
7412 if (N->isStrictFPOpcode()) {
7413 Chain = MergeInputChains(N, OtherExtend.getNode());
7414 if (!Chain)
7415 continue;
7416 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
7417 {MVT::v2f64, MVT::Other}, {Chain, Vec});
7418 Chain = VExtend.getValue(1);
7419 } else
7420 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
7421 MVT::v2f64, Vec);
7422 DCI.AddToWorklist(VExtend.getNode());
7423 SDValue Extract1 =
7424 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
7425 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
7426 DCI.AddToWorklist(Extract1.getNode());
7427 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
7428 if (Chain)
7429 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
7430 SDValue Extract0 =
7431 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
7432 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7433 if (Chain)
7434 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7435 N->getVTList(), Extract0, Chain);
7436 return Extract0;
7437 }
7438 }
7439 }
7440 }
7441 return SDValue();
7442}
7443
7444SDValue SystemZTargetLowering::combineINT_TO_FP(
7445 SDNode *N, DAGCombinerInfo &DCI) const {
7446 if (DCI.Level != BeforeLegalizeTypes)
7447 return SDValue();
7448 SelectionDAG &DAG = DCI.DAG;
7449 LLVMContext &Ctx = *DAG.getContext();
7450 unsigned Opcode = N->getOpcode();
7451 EVT OutVT = N->getValueType(0);
7452 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
7453 SDValue Op = N->getOperand(0);
7454 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
7455 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
7456
7457 // Insert an extension before type-legalization to avoid scalarization, e.g.:
7458 // v2f64 = uint_to_fp v2i16
7459 // =>
7460 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
7461 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
7462 OutScalarBits <= 64) {
7463 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
7464 EVT ExtVT = EVT::getVectorVT(
7465 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
7466 unsigned ExtOpcode =
7468 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
7469 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
7470 }
7471 return SDValue();
7472}
7473
7474SDValue SystemZTargetLowering::combineBSWAP(
7475 SDNode *N, DAGCombinerInfo &DCI) const {
7476 SelectionDAG &DAG = DCI.DAG;
7477 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
7478 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7479 N->getOperand(0).hasOneUse() &&
7480 canLoadStoreByteSwapped(N->getValueType(0))) {
7481 SDValue Load = N->getOperand(0);
7482 LoadSDNode *LD = cast<LoadSDNode>(Load);
7483
7484 // Create the byte-swapping load.
7485 SDValue Ops[] = {
7486 LD->getChain(), // Chain
7487 LD->getBasePtr() // Ptr
7488 };
7489 EVT LoadVT = N->getValueType(0);
7490 if (LoadVT == MVT::i16)
7491 LoadVT = MVT::i32;
7492 SDValue BSLoad =
7494 DAG.getVTList(LoadVT, MVT::Other),
7495 Ops, LD->getMemoryVT(), LD->getMemOperand());
7496
7497 // If this is an i16 load, insert the truncate.
7498 SDValue ResVal = BSLoad;
7499 if (N->getValueType(0) == MVT::i16)
7500 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
7501
7502 // First, combine the bswap away. This makes the value produced by the
7503 // load dead.
7504 DCI.CombineTo(N, ResVal);
7505
7506 // Next, combine the load away, we give it a bogus result value but a real
7507 // chain result. The result value is dead because the bswap is dead.
7508 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
7509
7510 // Return N so it doesn't get rechecked!
7511 return SDValue(N, 0);
7512 }
7513
7514 // Look through bitcasts that retain the number of vector elements.
7515 SDValue Op = N->getOperand(0);
7516 if (Op.getOpcode() == ISD::BITCAST &&
7517 Op.getValueType().isVector() &&
7518 Op.getOperand(0).getValueType().isVector() &&
7519 Op.getValueType().getVectorNumElements() ==
7520 Op.getOperand(0).getValueType().getVectorNumElements())
7521 Op = Op.getOperand(0);
7522
7523 // Push BSWAP into a vector insertion if at least one side then simplifies.
7524 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
7525 SDValue Vec = Op.getOperand(0);
7526 SDValue Elt = Op.getOperand(1);
7527 SDValue Idx = Op.getOperand(2);
7528
7530 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
7532 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
7533 (canLoadStoreByteSwapped(N->getValueType(0)) &&
7534 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
7535 EVT VecVT = N->getValueType(0);
7536 EVT EltVT = N->getValueType(0).getVectorElementType();
7537 if (VecVT != Vec.getValueType()) {
7538 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
7539 DCI.AddToWorklist(Vec.getNode());
7540 }
7541 if (EltVT != Elt.getValueType()) {
7542 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
7543 DCI.AddToWorklist(Elt.getNode());
7544 }
7545 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
7546 DCI.AddToWorklist(Vec.getNode());
7547 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
7548 DCI.AddToWorklist(Elt.getNode());
7549 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
7550 Vec, Elt, Idx);
7551 }
7552 }
7553
7554 // Push BSWAP into a vector shuffle if at least one side then simplifies.
7555 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
7556 if (SV && Op.hasOneUse()) {
7557 SDValue Op0 = Op.getOperand(0);
7558 SDValue Op1 = Op.getOperand(1);
7559
7561 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
7563 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
7564 EVT VecVT = N->getValueType(0);
7565 if (VecVT != Op0.getValueType()) {
7566 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
7567 DCI.AddToWorklist(Op0.getNode());
7568 }
7569 if (VecVT != Op1.getValueType()) {
7570 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
7571 DCI.AddToWorklist(Op1.getNode());
7572 }
7573 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
7574 DCI.AddToWorklist(Op0.getNode());
7575 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
7576 DCI.AddToWorklist(Op1.getNode());
7577 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
7578 }
7579 }
7580
7581 return SDValue();
7582}
7583
7584static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
7585 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
7586 // set by the CCReg instruction using the CCValid / CCMask masks,
7587 // If the CCReg instruction is itself a ICMP testing the condition
7588 // code set by some other instruction, see whether we can directly
7589 // use that condition code.
7590
7591 // Verify that we have an ICMP against some constant.
7592 if (CCValid != SystemZ::CCMASK_ICMP)
7593 return false;
7594 auto *ICmp = CCReg.getNode();
7595 if (ICmp->getOpcode() != SystemZISD::ICMP)
7596 return false;
7597 auto *CompareLHS = ICmp->getOperand(0).getNode();
7598 auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
7599 if (!CompareRHS)
7600 return false;
7601
7602 // Optimize the case where CompareLHS is a SELECT_CCMASK.
7603 if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
7604 // Verify that we have an appropriate mask for a EQ or NE comparison.
7605 bool Invert = false;
7606 if (CCMask == SystemZ::CCMASK_CMP_NE)
7607 Invert = !Invert;
7608 else if (CCMask != SystemZ::CCMASK_CMP_EQ)
7609 return false;
7610
7611 // Verify that the ICMP compares against one of select values.
7612 auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
7613 if (!TrueVal)
7614 return false;
7615 auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7616 if (!FalseVal)
7617 return false;
7618 if (CompareRHS->getZExtValue() == FalseVal->getZExtValue())
7619 Invert = !Invert;
7620 else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue())
7621 return false;
7622
7623 // Compute the effective CC mask for the new branch or select.
7624 auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
7625 auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
7626 if (!NewCCValid || !NewCCMask)
7627 return false;
7628 CCValid = NewCCValid->getZExtValue();
7629 CCMask = NewCCMask->getZExtValue();
7630 if (Invert)
7631 CCMask ^= CCValid;
7632
7633 // Return the updated CCReg link.
7634 CCReg = CompareLHS->getOperand(4);
7635 return true;
7636 }
7637
7638 // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
7639 if (CompareLHS->getOpcode() == ISD::SRA) {
7640 auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7641 if (!SRACount || SRACount->getZExtValue() != 30)
7642 return false;
7643 auto *SHL = CompareLHS->getOperand(0).getNode();
7644 if (SHL->getOpcode() != ISD::SHL)
7645 return false;
7646 auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
7647 if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
7648 return false;
7649 auto *IPM = SHL->getOperand(0).getNode();
7650 if (IPM->getOpcode() != SystemZISD::IPM)
7651 return false;
7652
7653 // Avoid introducing CC spills (because SRA would clobber CC).
7654 if (!CompareLHS->hasOneUse())
7655 return false;
7656 // Verify that the ICMP compares against zero.
7657 if (CompareRHS->getZExtValue() != 0)
7658 return false;
7659
7660 // Compute the effective CC mask for the new branch or select.
7661 CCMask = SystemZ::reverseCCMask(CCMask);
7662
7663 // Return the updated CCReg link.
7664 CCReg = IPM->getOperand(0);
7665 return true;
7666 }
7667
7668 return false;
7669}
7670
7671SDValue SystemZTargetLowering::combineBR_CCMASK(
7672 SDNode *N, DAGCombinerInfo &DCI) const {
7673 SelectionDAG &DAG = DCI.DAG;
7674
7675 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
7676 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
7677 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
7678 if (!CCValid || !CCMask)
7679 return SDValue();
7680
7681 int CCValidVal = CCValid->getZExtValue();
7682 int CCMaskVal = CCMask->getZExtValue();
7683 SDValue Chain = N->getOperand(0);
7684 SDValue CCReg = N->getOperand(4);
7685
7686 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7687 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
7688 Chain,
7689 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7690 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7691 N->getOperand(3), CCReg);
7692 return SDValue();
7693}
7694
7695SDValue SystemZTargetLowering::combineSELECT_CCMASK(
7696 SDNode *N, DAGCombinerInfo &DCI) const {
7697 SelectionDAG &DAG = DCI.DAG;
7698
7699 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
7700 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
7701 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
7702 if (!CCValid || !CCMask)
7703 return SDValue();
7704
7705 int CCValidVal = CCValid->getZExtValue();
7706 int CCMaskVal = CCMask->getZExtValue();
7707 SDValue CCReg = N->getOperand(4);
7708
7709 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7710 return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
7711 N->getOperand(0), N->getOperand(1),
7712 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7713 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7714 CCReg);
7715 return SDValue();
7716}
7717
7718
7719SDValue SystemZTargetLowering::combineGET_CCMASK(
7720 SDNode *N, DAGCombinerInfo &DCI) const {
7721
7722 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
7723 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
7724 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
7725 if (!CCValid || !CCMask)
7726 return SDValue();
7727 int CCValidVal = CCValid->getZExtValue();
7728 int CCMaskVal = CCMask->getZExtValue();
7729
7730 SDValue Select = N->getOperand(0);
7731 if (Select->getOpcode() == ISD::TRUNCATE)
7732 Select = Select->getOperand(0);
7733 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
7734 return SDValue();
7735
7736 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
7737 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
7738 if (!SelectCCValid || !SelectCCMask)
7739 return SDValue();
7740 int SelectCCValidVal = SelectCCValid->getZExtValue();
7741 int SelectCCMaskVal = SelectCCMask->getZExtValue();
7742
7743 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
7744 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
7745 if (!TrueVal || !FalseVal)
7746 return SDValue();
7747 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
7748 ;
7749 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
7750 SelectCCMaskVal ^= SelectCCValidVal;
7751 else
7752 return SDValue();
7753
7754 if (SelectCCValidVal & ~CCValidVal)
7755 return SDValue();
7756 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
7757 return SDValue();
7758
7759 return Select->getOperand(4);
7760}
7761
7762SDValue SystemZTargetLowering::combineIntDIVREM(
7763 SDNode *N, DAGCombinerInfo &DCI) const {
7764 SelectionDAG &DAG = DCI.DAG;
7765 EVT VT = N->getValueType(0);
7766 // In the case where the divisor is a vector of constants a cheaper
7767 // sequence of instructions can replace the divide. BuildSDIV is called to
7768 // do this during DAG combining, but it only succeeds when it can build a
7769 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
7770 // since it is not Legal but Custom it can only happen before
7771 // legalization. Therefore we must scalarize this early before Combine
7772 // 1. For widened vectors, this is already the result of type legalization.
7773 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
7774 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
7775 return DAG.UnrollVectorOp(N);
7776 return SDValue();
7777}
7778
7779SDValue SystemZTargetLowering::combineINTRINSIC(
7780 SDNode *N, DAGCombinerInfo &DCI) const {
7781 SelectionDAG &DAG = DCI.DAG;
7782
7783 unsigned Id = N->getConstantOperandVal(1);
7784 switch (Id) {
7785 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
7786 // or larger is simply a vector load.
7787 case Intrinsic::s390_vll:
7788 case Intrinsic::s390_vlrl:
7789 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
7790 if (C->getZExtValue() >= 15)
7791 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
7792 N->getOperand(3), MachinePointerInfo());
7793 break;
7794 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
7795 case Intrinsic::s390_vstl:
7796 case Intrinsic::s390_vstrl:
7797 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
7798 if (C->getZExtValue() >= 15)
7799 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
7800 N->getOperand(4), MachinePointerInfo());
7801 break;
7802 }
7803
7804 return SDValue();
7805}
7806
7807SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
7808 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
7809 return N->getOperand(0);
7810 return N;
7811}
7812
7814 DAGCombinerInfo &DCI) const {
7815 switch(N->getOpcode()) {
7816 default: break;
7817 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
7818 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
7819 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
7821 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
7822 case ISD::LOAD: return combineLOAD(N, DCI);
7823 case ISD::STORE: return combineSTORE(N, DCI);
7824 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
7825 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
7826 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
7828 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
7830 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
7831 case ISD::SINT_TO_FP:
7832 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
7833 case ISD::BSWAP: return combineBSWAP(N, DCI);
7834 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
7835 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
7836 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
7837 case ISD::SDIV:
7838 case ISD::UDIV:
7839 case ISD::SREM:
7840 case ISD::UREM: return combineIntDIVREM(N, DCI);
7842 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
7843 }
7844
7845 return SDValue();
7846}
7847
7848// Return the demanded elements for the OpNo source operand of Op. DemandedElts
7849// are for Op.
7850static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
7851 unsigned OpNo) {
7852 EVT VT = Op.getValueType();
7853 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
7854 APInt SrcDemE;
7855 unsigned Opcode = Op.getOpcode();
7856 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7857 unsigned Id = Op.getConstantOperandVal(0);
7858 switch (Id) {
7859 case Intrinsic::s390_vpksh: // PACKS
7860 case Intrinsic::s390_vpksf:
7861 case Intrinsic::s390_vpksg:
7862 case Intrinsic::s390_vpkshs: // PACKS_CC
7863 case Intrinsic::s390_vpksfs:
7864 case Intrinsic::s390_vpksgs:
7865 case Intrinsic::s390_vpklsh: // PACKLS
7866 case Intrinsic::s390_vpklsf:
7867 case Intrinsic::s390_vpklsg:
7868 case Intrinsic::s390_vpklshs: // PACKLS_CC
7869 case Intrinsic::s390_vpklsfs:
7870 case Intrinsic::s390_vpklsgs:
7871 // VECTOR PACK truncates the elements of two source vectors into one.
7872 SrcDemE = DemandedElts;
7873 if (OpNo == 2)
7874 SrcDemE.lshrInPlace(NumElts / 2);
7875 SrcDemE = SrcDemE.trunc(NumElts / 2);
7876 break;
7877 // VECTOR UNPACK extends half the elements of the source vector.
7878 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7879 case Intrinsic::s390_vuphh:
7880 case Intrinsic::s390_vuphf:
7881 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
7882 case Intrinsic::s390_vuplhh:
7883 case Intrinsic::s390_vuplhf:
7884 SrcDemE = APInt(NumElts * 2, 0);
7885 SrcDemE.insertBits(DemandedElts, 0);
7886 break;
7887 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7888 case Intrinsic::s390_vuplhw:
7889 case Intrinsic::s390_vuplf:
7890 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
7891 case Intrinsic::s390_vupllh:
7892 case Intrinsic::s390_vupllf:
7893 SrcDemE = APInt(NumElts * 2, 0);
7894 SrcDemE.insertBits(DemandedElts, NumElts);
7895 break;
7896 case Intrinsic::s390_vpdi: {
7897 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
7898 SrcDemE = APInt(NumElts, 0);
7899 if (!DemandedElts[OpNo - 1])
7900 break;
7901 unsigned Mask = Op.getConstantOperandVal(3);
7902 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
7903 // Demand input element 0 or 1, given by the mask bit value.
7904 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
7905 break;
7906 }
7907 case Intrinsic::s390_vsldb: {
7908 // VECTOR SHIFT LEFT DOUBLE BY BYTE
7909 assert(VT == MVT::v16i8 && "Unexpected type.");
7910 unsigned FirstIdx = Op.getConstantOperandVal(3);
7911 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
7912 unsigned NumSrc0Els = 16 - FirstIdx;
7913 SrcDemE = APInt(NumElts, 0);
7914 if (OpNo == 1) {
7915 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
7916 SrcDemE.insertBits(DemEls, FirstIdx);
7917 } else {
7918 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
7919 SrcDemE.insertBits(DemEls, 0);
7920 }
7921 break;
7922 }
7923 case Intrinsic::s390_vperm:
7924 SrcDemE = APInt(NumElts, -1);
7925 break;
7926 default:
7927 llvm_unreachable("Unhandled intrinsic.");
7928 break;
7929 }
7930 } else {
7931 switch (Opcode) {
7933 // Scalar operand.
7934 SrcDemE = APInt(1, 1);
7935 break;
7937 SrcDemE = DemandedElts;
7938 break;
7939 default:
7940 llvm_unreachable("Unhandled opcode.");
7941 break;
7942 }
7943 }
7944 return SrcDemE;
7945}
7946
7947static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
7948 const APInt &DemandedElts,
7949 const SelectionDAG &DAG, unsigned Depth,
7950 unsigned OpNo) {
7951 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
7952 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
7953 KnownBits LHSKnown =
7954 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
7955 KnownBits RHSKnown =
7956 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
7957 Known = LHSKnown.intersectWith(RHSKnown);
7958}
7959
7960void
7962 KnownBits &Known,
7963 const APInt &DemandedElts,
7964 const SelectionDAG &DAG,
7965 unsigned Depth) const {
7966 Known.resetAll();
7967
7968 // Intrinsic CC result is returned in the two low bits.
7969 unsigned tmp0, tmp1; // not used
7970 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) {
7971 Known.Zero.setBitsFrom(2);
7972 return;
7973 }
7974 EVT VT = Op.getValueType();
7975 if (Op.getResNo() != 0 || VT == MVT::Untyped)
7976 return;
7977 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
7978 "KnownBits does not match VT in bitwidth");
7979 assert ((!VT.isVector() ||
7980 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
7981 "DemandedElts does not match VT number of elements");
7982 unsigned BitWidth = Known.getBitWidth();
7983 unsigned Opcode = Op.getOpcode();
7984 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7985 bool IsLogical = false;
7986 unsigned Id = Op.getConstantOperandVal(0);
7987 switch (Id) {
7988 case Intrinsic::s390_vpksh: // PACKS
7989 case Intrinsic::s390_vpksf:
7990 case Intrinsic::s390_vpksg:
7991 case Intrinsic::s390_vpkshs: // PACKS_CC
7992 case Intrinsic::s390_vpksfs:
7993 case Intrinsic::s390_vpksgs:
7994 case Intrinsic::s390_vpklsh: // PACKLS
7995 case Intrinsic::s390_vpklsf:
7996 case Intrinsic::s390_vpklsg:
7997 case Intrinsic::s390_vpklshs: // PACKLS_CC
7998 case Intrinsic::s390_vpklsfs:
7999 case Intrinsic::s390_vpklsgs:
8000 case Intrinsic::s390_vpdi:
8001 case Intrinsic::s390_vsldb:
8002 case Intrinsic::s390_vperm:
8003 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
8004 break;
8005 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
8006 case Intrinsic::s390_vuplhh:
8007 case Intrinsic::s390_vuplhf:
8008 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
8009 case Intrinsic::s390_vupllh:
8010 case Intrinsic::s390_vupllf:
8011 IsLogical = true;
8012 [[fallthrough]];
8013 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
8014 case Intrinsic::s390_vuphh:
8015 case Intrinsic::s390_vuphf:
8016 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
8017 case Intrinsic::s390_vuplhw:
8018 case Intrinsic::s390_vuplf: {
8019 SDValue SrcOp = Op.getOperand(1);
8020 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
8021 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
8022 if (IsLogical) {
8023 Known = Known.zext(BitWidth);
8024 } else
8025 Known = Known.sext(BitWidth);
8026 break;
8027 }
8028 default:
8029 break;
8030 }
8031 } else {
8032 switch (Opcode) {
8035 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
8036 break;
8037 case SystemZISD::REPLICATE: {
8038 SDValue SrcOp = Op.getOperand(0);
8039 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
8040 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
8041 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
8042 break;
8043 }
8044 default:
8045 break;
8046 }
8047 }
8048
8049 // Known has the width of the source operand(s). Adjust if needed to match
8050 // the passed bitwidth.
8051 if (Known.getBitWidth() != BitWidth)
8052 Known = Known.anyextOrTrunc(BitWidth);
8053}
8054
8055static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
8056 const SelectionDAG &DAG, unsigned Depth,
8057 unsigned OpNo) {
8058 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
8059 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
8060 if (LHS == 1) return 1; // Early out.
8061 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
8062 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
8063 if (RHS == 1) return 1; // Early out.
8064 unsigned Common = std::min(LHS, RHS);
8065 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
8066 EVT VT = Op.getValueType();
8067 unsigned VTBits = VT.getScalarSizeInBits();
8068 if (SrcBitWidth > VTBits) { // PACK
8069 unsigned SrcExtraBits = SrcBitWidth - VTBits;
8070 if (Common > SrcExtraBits)
8071 return (Common - SrcExtraBits);
8072 return 1;
8073 }
8074 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
8075 return Common;
8076}
8077
8078unsigned
8080 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
8081 unsigned Depth) const {
8082 if (Op.getResNo() != 0)
8083 return 1;
8084 unsigned Opcode = Op.getOpcode();
8085 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
8086 unsigned Id = Op.getConstantOperandVal(0);
8087 switch (Id) {
8088 case Intrinsic::s390_vpksh: // PACKS
8089 case Intrinsic::s390_vpksf:
8090 case Intrinsic::s390_vpksg:
8091 case Intrinsic::s390_vpkshs: // PACKS_CC
8092 case Intrinsic::s390_vpksfs:
8093 case Intrinsic::s390_vpksgs:
8094 case Intrinsic::s390_vpklsh: // PACKLS
8095 case Intrinsic::s390_vpklsf:
8096 case Intrinsic::s390_vpklsg:
8097 case Intrinsic::s390_vpklshs: // PACKLS_CC
8098 case Intrinsic::s390_vpklsfs:
8099 case Intrinsic::s390_vpklsgs:
8100 case Intrinsic::s390_vpdi:
8101 case Intrinsic::s390_vsldb:
8102 case Intrinsic::s390_vperm:
8103 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
8104 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
8105 case Intrinsic::s390_vuphh:
8106 case Intrinsic::s390_vuphf:
8107 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
8108 case Intrinsic::s390_vuplhw:
8109 case Intrinsic::s390_vuplf: {
8110 SDValue PackedOp = Op.getOperand(1);
8111 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
8112 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
8113 EVT VT = Op.getValueType();
8114 unsigned VTBits = VT.getScalarSizeInBits();
8115 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
8116 return Tmp;
8117 }
8118 default:
8119 break;
8120 }
8121 } else {
8122 switch (Opcode) {
8124 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
8125 default:
8126 break;
8127 }
8128 }
8129
8130 return 1;
8131}
8132
8135 const APInt &DemandedElts, const SelectionDAG &DAG,
8136 bool PoisonOnly, unsigned Depth) const {
8137 switch (Op->getOpcode()) {
8140 return true;
8141 }
8142 return false;
8143}
8144
8145unsigned
8147 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
8148 unsigned StackAlign = TFI->getStackAlignment();
8149 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
8150 "Unexpected stack alignment");
8151 // The default stack probe size is 4096 if the function has no
8152 // stack-probe-size attribute.
8153 unsigned StackProbeSize =
8154 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
8155 // Round down to the stack alignment.
8156 StackProbeSize &= ~(StackAlign - 1);
8157 return StackProbeSize ? StackProbeSize : StackAlign;
8158}
8159
8160//===----------------------------------------------------------------------===//
8161// Custom insertion
8162//===----------------------------------------------------------------------===//
8163
8164// Force base value Base into a register before MI. Return the register.
8166 const SystemZInstrInfo *TII) {
8167 MachineBasicBlock *MBB = MI.getParent();
8168 MachineFunction &MF = *MBB->getParent();
8170
8171 if (Base.isReg()) {
8172 // Copy Base into a new virtual register to help register coalescing in
8173 // cases with multiple uses.
8174 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8175 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
8176 .add(Base);
8177 return Reg;
8178 }
8179
8180 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8181 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
8182 .add(Base)
8183 .addImm(0)
8184 .addReg(0);
8185 return Reg;
8186}
8187
8188// The CC operand of MI might be missing a kill marker because there
8189// were multiple uses of CC, and ISel didn't know which to mark.
8190// Figure out whether MI should have had a kill marker.
8192 // Scan forward through BB for a use/def of CC.
8194 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
8195 const MachineInstr& mi = *miI;
8196 if (mi.readsRegister(SystemZ::CC, /*TRI=*/nullptr))
8197 return false;
8198 if (mi.definesRegister(SystemZ::CC, /*TRI=*/nullptr))
8199 break; // Should have kill-flag - update below.
8200 }
8201
8202 // If we hit the end of the block, check whether CC is live into a
8203 // successor.
8204 if (miI == MBB->end()) {
8205 for (const MachineBasicBlock *Succ : MBB->successors())
8206 if (Succ->isLiveIn(SystemZ::CC))
8207 return false;
8208 }
8209
8210 return true;
8211}
8212
8213// Return true if it is OK for this Select pseudo-opcode to be cascaded
8214// together with other Select pseudo-opcodes into a single basic-block with
8215// a conditional jump around it.
8217 switch (MI.getOpcode()) {
8218 case SystemZ::Select32:
8219 case SystemZ::Select64:
8220 case SystemZ::Select128:
8221 case SystemZ::SelectF32:
8222 case SystemZ::SelectF64:
8223 case SystemZ::SelectF128:
8224 case SystemZ::SelectVR32:
8225 case SystemZ::SelectVR64:
8226 case SystemZ::SelectVR128:
8227 return true;
8228
8229 default:
8230 return false;
8231 }
8232}
8233
8234// Helper function, which inserts PHI functions into SinkMBB:
8235// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
8236// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
8238 MachineBasicBlock *TrueMBB,
8239 MachineBasicBlock *FalseMBB,
8240 MachineBasicBlock *SinkMBB) {
8241 MachineFunction *MF = TrueMBB->getParent();
8243
8244 MachineInstr *FirstMI = Selects.front();
8245 unsigned CCValid = FirstMI->getOperand(3).getImm();
8246 unsigned CCMask = FirstMI->getOperand(4).getImm();
8247
8248 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
8249
8250 // As we are creating the PHIs, we have to be careful if there is more than
8251 // one. Later Selects may reference the results of earlier Selects, but later
8252 // PHIs have to reference the individual true/false inputs from earlier PHIs.
8253 // That also means that PHI construction must work forward from earlier to
8254 // later, and that the code must maintain a mapping from earlier PHI's
8255 // destination registers, and the registers that went into the PHI.
8257
8258 for (auto *MI : Selects) {
8259 Register DestReg = MI->getOperand(0).getReg();
8260 Register TrueReg = MI->getOperand(1).getReg();
8261 Register FalseReg = MI->getOperand(2).getReg();
8262
8263 // If this Select we are generating is the opposite condition from
8264 // the jump we generated, then we have to swap the operands for the
8265 // PHI that is going to be generated.
8266 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
8267 std::swap(TrueReg, FalseReg);
8268
8269 if (RegRewriteTable.contains(TrueReg))
8270 TrueReg = RegRewriteTable[TrueReg].first;
8271
8272 if (RegRewriteTable.contains(FalseReg))
8273 FalseReg = RegRewriteTable[FalseReg].second;
8274
8275 DebugLoc DL = MI->getDebugLoc();
8276 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
8277 .addReg(TrueReg).addMBB(TrueMBB)
8278 .addReg(FalseReg).addMBB(FalseMBB);
8279
8280 // Add this PHI to the rewrite table.
8281 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
8282 }
8283
8285}
8286
8288SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
8289 MachineBasicBlock *BB) const {
8290 MachineFunction &MF = *BB->getParent();
8291 MachineFrameInfo &MFI = MF.getFrameInfo();
8292 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
8293 assert(TFL->hasReservedCallFrame(MF) &&
8294 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
8295 (void)TFL;
8296 // Get the MaxCallFrameSize value and erase MI since it serves no further
8297 // purpose as the call frame is statically reserved in the prolog. Set
8298 // AdjustsStack as MI is *not* mapped as a frame instruction.
8299 uint32_t NumBytes = MI.getOperand(0).getImm();
8300 if (NumBytes > MFI.getMaxCallFrameSize())
8301 MFI.setMaxCallFrameSize(NumBytes);
8302 MFI.setAdjustsStack(true);
8303
8304 MI.eraseFromParent();
8305 return BB;
8306}
8307
8308// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
8310SystemZTargetLowering::emitSelect(MachineInstr &MI,
8311 MachineBasicBlock *MBB) const {
8312 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
8313 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8314
8315 unsigned CCValid = MI.getOperand(3).getImm();
8316 unsigned CCMask = MI.getOperand(4).getImm();
8317
8318 // If we have a sequence of Select* pseudo instructions using the
8319 // same condition code value, we want to expand all of them into
8320 // a single pair of basic blocks using the same condition.
8323 Selects.push_back(&MI);
8324 unsigned Count = 0;
8325 for (MachineInstr &NextMI : llvm::make_range(
8326 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
8327 if (isSelectPseudo(NextMI)) {
8328 assert(NextMI.getOperand(3).getImm() == CCValid &&
8329 "Bad CCValid operands since CC was not redefined.");
8330 if (NextMI.getOperand(4).getImm() == CCMask ||
8331 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
8332 Selects.push_back(&NextMI);
8333 continue;
8334 }
8335 break;
8336 }
8337 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) ||
8338 NextMI.usesCustomInsertionHook())
8339 break;
8340 bool User = false;
8341 for (auto *SelMI : Selects)
8342 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
8343 User = true;
8344 break;
8345 }
8346 if (NextMI.isDebugInstr()) {
8347 if (User) {
8348 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
8349 DbgValues.push_back(&NextMI);
8350 }
8351 } else if (User || ++Count > 20)
8352 break;
8353 }
8354
8355 MachineInstr *LastMI = Selects.back();
8356 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) ||
8357 checkCCKill(*LastMI, MBB));
8358 MachineBasicBlock *StartMBB = MBB;
8360 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8361
8362 // Unless CC was killed in the last Select instruction, mark it as
8363 // live-in to both FalseMBB and JoinMBB.
8364 if (!CCKilled) {
8365 FalseMBB->addLiveIn(SystemZ::CC);
8366 JoinMBB->addLiveIn(SystemZ::CC);
8367 }
8368
8369 // StartMBB:
8370 // BRC CCMask, JoinMBB
8371 // # fallthrough to FalseMBB
8372 MBB = StartMBB;
8373 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8374 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8375 MBB->addSuccessor(JoinMBB);
8376 MBB->addSuccessor(FalseMBB);
8377
8378 // FalseMBB:
8379 // # fallthrough to JoinMBB
8380 MBB = FalseMBB;
8381 MBB->addSuccessor(JoinMBB);
8382
8383 // JoinMBB:
8384 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
8385 // ...
8386 MBB = JoinMBB;
8387 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
8388 for (auto *SelMI : Selects)
8389 SelMI->eraseFromParent();
8390
8392 for (auto *DbgMI : DbgValues)
8393 MBB->splice(InsertPos, StartMBB, DbgMI);
8394
8395 return JoinMBB;
8396}
8397
8398// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
8399// StoreOpcode is the store to use and Invert says whether the store should
8400// happen when the condition is false rather than true. If a STORE ON
8401// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
8402MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
8404 unsigned StoreOpcode,
8405 unsigned STOCOpcode,
8406 bool Invert) const {
8407 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8408
8409 Register SrcReg = MI.getOperand(0).getReg();
8410 MachineOperand Base = MI.getOperand(1);
8411 int64_t Disp = MI.getOperand(2).getImm();
8412 Register IndexReg = MI.getOperand(3).getReg();
8413 unsigned CCValid = MI.getOperand(4).getImm();
8414 unsigned CCMask = MI.getOperand(5).getImm();
8415 DebugLoc DL = MI.getDebugLoc();
8416
8417 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
8418
8419 // ISel pattern matching also adds a load memory operand of the same
8420 // address, so take special care to find the storing memory operand.
8421 MachineMemOperand *MMO = nullptr;
8422 for (auto *I : MI.memoperands())
8423 if (I->isStore()) {
8424 MMO = I;
8425 break;
8426 }
8427
8428 // Use STOCOpcode if possible. We could use different store patterns in
8429 // order to avoid matching the index register, but the performance trade-offs
8430 // might be more complicated in that case.
8431 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
8432 if (Invert)
8433 CCMask ^= CCValid;
8434
8435 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
8436 .addReg(SrcReg)
8437 .add(Base)
8438 .addImm(Disp)
8439 .addImm(CCValid)
8440 .addImm(CCMask)
8441 .addMemOperand(MMO);
8442
8443 MI.eraseFromParent();
8444 return MBB;
8445 }
8446
8447 // Get the condition needed to branch around the store.
8448 if (!Invert)
8449 CCMask ^= CCValid;
8450
8451 MachineBasicBlock *StartMBB = MBB;
8453 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8454
8455 // Unless CC was killed in the CondStore instruction, mark it as
8456 // live-in to both FalseMBB and JoinMBB.
8457 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) &&
8458 !checkCCKill(MI, JoinMBB)) {
8459 FalseMBB->addLiveIn(SystemZ::CC);
8460 JoinMBB->addLiveIn(SystemZ::CC);
8461 }
8462
8463 // StartMBB:
8464 // BRC CCMask, JoinMBB
8465 // # fallthrough to FalseMBB
8466 MBB = StartMBB;
8467 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8468 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8469 MBB->addSuccessor(JoinMBB);
8470 MBB->addSuccessor(FalseMBB);
8471
8472 // FalseMBB:
8473 // store %SrcReg, %Disp(%Index,%Base)
8474 // # fallthrough to JoinMBB
8475 MBB = FalseMBB;
8476 BuildMI(MBB, DL, TII->get(StoreOpcode))
8477 .addReg(SrcReg)
8478 .add(Base)
8479 .addImm(Disp)
8480 .addReg(IndexReg)
8481 .addMemOperand(MMO);
8482 MBB->addSuccessor(JoinMBB);
8483
8484 MI.eraseFromParent();
8485 return JoinMBB;
8486}
8487
8488// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
8490SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
8492 bool Unsigned) const {
8493 MachineFunction &MF = *MBB->getParent();
8494 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8496
8497 // Synthetic instruction to compare 128-bit values.
8498 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
8499 Register Op0 = MI.getOperand(0).getReg();
8500 Register Op1 = MI.getOperand(1).getReg();
8501
8502 MachineBasicBlock *StartMBB = MBB;
8504 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
8505
8506 // StartMBB:
8507 //
8508 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
8509 // Swap the inputs to get:
8510 // CC 1 if high(Op0) > high(Op1)
8511 // CC 2 if high(Op0) < high(Op1)
8512 // CC 0 if high(Op0) == high(Op1)
8513 //
8514 // If CC != 0, we'd done, so jump over the next instruction.
8515 //
8516 // VEC[L]G Op1, Op0
8517 // JNE JoinMBB
8518 // # fallthrough to HiEqMBB
8519 MBB = StartMBB;
8520 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
8521 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
8522 .addReg(Op1).addReg(Op0);
8523 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8525 MBB->addSuccessor(JoinMBB);
8526 MBB->addSuccessor(HiEqMBB);
8527
8528 // HiEqMBB:
8529 //
8530 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
8531 // Since we already know the high parts are equal, the CC
8532 // result will only depend on the low parts:
8533 // CC 1 if low(Op0) > low(Op1)
8534 // CC 3 if low(Op0) <= low(Op1)
8535 //
8536 // VCHLGS Tmp, Op0, Op1
8537 // # fallthrough to JoinMBB
8538 MBB = HiEqMBB;
8539 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
8540 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
8541 .addReg(Op0).addReg(Op1);
8542 MBB->addSuccessor(JoinMBB);
8543
8544 // Mark CC as live-in to JoinMBB.
8545 JoinMBB->addLiveIn(SystemZ::CC);
8546
8547 MI.eraseFromParent();
8548 return JoinMBB;
8549}
8550
8551// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
8552// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
8553// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
8554// whether the field should be inverted after performing BinOpcode (e.g. for
8555// NAND).
8556MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
8557 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
8558 bool Invert) const {
8559 MachineFunction &MF = *MBB->getParent();
8560 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8562
8563 // Extract the operands. Base can be a register or a frame index.
8564 // Src2 can be a register or immediate.
8565 Register Dest = MI.getOperand(0).getReg();
8566 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8567 int64_t Disp = MI.getOperand(2).getImm();
8568 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
8569 Register BitShift = MI.getOperand(4).getReg();
8570 Register NegBitShift = MI.getOperand(5).getReg();
8571 unsigned BitSize = MI.getOperand(6).getImm();
8572 DebugLoc DL = MI.getDebugLoc();
8573
8574 // Get the right opcodes for the displacement.
8575 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8576 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8577 assert(LOpcode && CSOpcode && "Displacement out of range");
8578
8579 // Create virtual registers for temporary results.
8580 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8581 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8582 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8583 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8584 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8585
8586 // Insert a basic block for the main loop.
8587 MachineBasicBlock *StartMBB = MBB;
8589 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8590
8591 // StartMBB:
8592 // ...
8593 // %OrigVal = L Disp(%Base)
8594 // # fall through to LoopMBB
8595 MBB = StartMBB;
8596 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
8597 MBB->addSuccessor(LoopMBB);
8598
8599 // LoopMBB:
8600 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
8601 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
8602 // %RotatedNewVal = OP %RotatedOldVal, %Src2
8603 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
8604 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
8605 // JNE LoopMBB
8606 // # fall through to DoneMBB
8607 MBB = LoopMBB;
8608 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8609 .addReg(OrigVal).addMBB(StartMBB)
8610 .addReg(Dest).addMBB(LoopMBB);
8611 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
8612 .addReg(OldVal).addReg(BitShift).addImm(0);
8613 if (Invert) {
8614 // Perform the operation normally and then invert every bit of the field.
8615 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8616 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
8617 // XILF with the upper BitSize bits set.
8618 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
8619 .addReg(Tmp).addImm(-1U << (32 - BitSize));
8620 } else if (BinOpcode)
8621 // A simply binary operation.
8622 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
8623 .addReg(RotatedOldVal)
8624 .add(Src2);
8625 else
8626 // Use RISBG to rotate Src2 into position and use it to replace the
8627 // field in RotatedOldVal.
8628 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
8629 .addReg(RotatedOldVal).addReg(Src2.getReg())
8630 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
8631 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
8632 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
8633 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
8634 .addReg(OldVal)
8635 .addReg(NewVal)
8636 .add(Base)
8637 .addImm(Disp);
8638 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8640 MBB->addSuccessor(LoopMBB);
8641 MBB->addSuccessor(DoneMBB);
8642
8643 MI.eraseFromParent();
8644 return DoneMBB;
8645}
8646
8647// Implement EmitInstrWithCustomInserter for subword pseudo
8648// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
8649// instruction that should be used to compare the current field with the
8650// minimum or maximum value. KeepOldMask is the BRC condition-code mask
8651// for when the current field should be kept.
8652MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
8653 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
8654 unsigned KeepOldMask) const {
8655 MachineFunction &MF = *MBB->getParent();
8656 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8658
8659 // Extract the operands. Base can be a register or a frame index.
8660 Register Dest = MI.getOperand(0).getReg();
8661 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8662 int64_t Disp = MI.getOperand(2).getImm();
8663 Register Src2 = MI.getOperand(3).getReg();
8664 Register BitShift = MI.getOperand(4).getReg();
8665 Register NegBitShift = MI.getOperand(5).getReg();
8666 unsigned BitSize = MI.getOperand(6).getImm();
8667 DebugLoc DL = MI.getDebugLoc();
8668
8669 // Get the right opcodes for the displacement.
8670 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8671 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8672 assert(LOpcode && CSOpcode && "Displacement out of range");
8673
8674 // Create virtual registers for temporary results.
8675 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8676 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8677 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8678 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8679 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8680 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8681
8682 // Insert 3 basic blocks for the loop.
8683 MachineBasicBlock *StartMBB = MBB;
8685 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8686 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
8687 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
8688
8689 // StartMBB:
8690 // ...
8691 // %OrigVal = L Disp(%Base)
8692 // # fall through to LoopMBB
8693 MBB = StartMBB;
8694 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
8695 MBB->addSuccessor(LoopMBB);
8696
8697 // LoopMBB:
8698 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
8699 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
8700 // CompareOpcode %RotatedOldVal, %Src2
8701 // BRC KeepOldMask, UpdateMBB
8702 MBB = LoopMBB;
8703 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8704 .addReg(OrigVal).addMBB(StartMBB)
8705 .addReg(Dest).addMBB(UpdateMBB);
8706 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
8707 .addReg(OldVal).addReg(BitShift).addImm(0);
8708 BuildMI(MBB, DL, TII->get(CompareOpcode))
8709 .addReg(RotatedOldVal).addReg(Src2);
8710 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8711 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
8712 MBB->addSuccessor(UpdateMBB);
8713 MBB->addSuccessor(UseAltMBB);
8714
8715 // UseAltMBB:
8716 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
8717 // # fall through to UpdateMBB
8718 MBB = UseAltMBB;
8719 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
8720 .addReg(RotatedOldVal).addReg(Src2)
8721 .addImm(32).addImm(31 + BitSize).addImm(0);
8722 MBB->addSuccessor(UpdateMBB);
8723
8724 // UpdateMBB:
8725 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
8726 // [ %RotatedAltVal, UseAltMBB ]
8727 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
8728 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
8729 // JNE LoopMBB
8730 // # fall through to DoneMBB
8731 MBB = UpdateMBB;
8732 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
8733 .addReg(RotatedOldVal).addMBB(LoopMBB)
8734 .addReg(RotatedAltVal).addMBB(UseAltMBB);
8735 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
8736 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
8737 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
8738 .addReg(OldVal)
8739 .addReg(NewVal)
8740 .add(Base)
8741 .addImm(Disp);
8742 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8744 MBB->addSuccessor(LoopMBB);
8745 MBB->addSuccessor(DoneMBB);
8746
8747 MI.eraseFromParent();
8748 return DoneMBB;
8749}
8750
8751// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
8752// instruction MI.
8754SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
8755 MachineBasicBlock *MBB) const {
8756 MachineFunction &MF = *MBB->getParent();
8757 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8759
8760 // Extract the operands. Base can be a register or a frame index.
8761 Register Dest = MI.getOperand(0).getReg();
8762 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8763 int64_t Disp = MI.getOperand(2).getImm();
8764 Register CmpVal = MI.getOperand(3).getReg();
8765 Register OrigSwapVal = MI.getOperand(4).getReg();
8766 Register BitShift = MI.getOperand(5).getReg();
8767 Register NegBitShift = MI.getOperand(6).getReg();
8768 int64_t BitSize = MI.getOperand(7).getImm();
8769 DebugLoc DL = MI.getDebugLoc();
8770
8771 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
8772
8773 // Get the right opcodes for the displacement and zero-extension.
8774 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8775 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8776 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
8777 assert(LOpcode && CSOpcode && "Displacement out of range");
8778
8779 // Create virtual registers for temporary results.
8780 Register OrigOldVal = MRI.createVirtualRegister(RC);
8781 Register OldVal = MRI.createVirtualRegister(RC);
8782 Register SwapVal = MRI.createVirtualRegister(RC);
8783 Register StoreVal = MRI.createVirtualRegister(RC);
8784 Register OldValRot = MRI.createVirtualRegister(RC);
8785 Register RetryOldVal = MRI.createVirtualRegister(RC);
8786 Register RetrySwapVal = MRI.createVirtualRegister(RC);
8787
8788 // Insert 2 basic blocks for the loop.
8789 MachineBasicBlock *StartMBB = MBB;
8791 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8792 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
8793
8794 // StartMBB:
8795 // ...
8796 // %OrigOldVal = L Disp(%Base)
8797 // # fall through to LoopMBB
8798 MBB = StartMBB;
8799 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
8800 .add(Base)
8801 .addImm(Disp)
8802 .addReg(0);
8803 MBB->addSuccessor(LoopMBB);
8804
8805 // LoopMBB:
8806 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
8807 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
8808 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
8809 // ^^ The low BitSize bits contain the field
8810 // of interest.
8811 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
8812 // ^^ Replace the upper 32-BitSize bits of the
8813 // swap value with those that we loaded and rotated.
8814 // %Dest = LL[CH] %OldValRot
8815 // CR %Dest, %CmpVal
8816 // JNE DoneMBB
8817 // # Fall through to SetMBB
8818 MBB = LoopMBB;
8819 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8820 .addReg(OrigOldVal).addMBB(StartMBB)
8821 .addReg(RetryOldVal).addMBB(SetMBB);
8822 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
8823 .addReg(OrigSwapVal).addMBB(StartMBB)
8824 .addReg(RetrySwapVal).addMBB(SetMBB);
8825 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
8826 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
8827 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
8828 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
8829 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
8830 .addReg(OldValRot);
8831 BuildMI(MBB, DL, TII->get(SystemZ::CR))
8832 .addReg(Dest).addReg(CmpVal);
8833 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8836 MBB->addSuccessor(DoneMBB);
8837 MBB->addSuccessor(SetMBB);
8838
8839 // SetMBB:
8840 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
8841 // ^^ Rotate the new field to its proper position.
8842 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
8843 // JNE LoopMBB
8844 // # fall through to ExitMBB
8845 MBB = SetMBB;
8846 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
8847 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
8848 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
8849 .addReg(OldVal)
8850 .addReg(StoreVal)
8851 .add(Base)
8852 .addImm(Disp);
8853 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8855 MBB->addSuccessor(LoopMBB);
8856 MBB->addSuccessor(DoneMBB);
8857
8858 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
8859 // to the block after the loop. At this point, CC may have been defined
8860 // either by the CR in LoopMBB or by the CS in SetMBB.
8861 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))
8862 DoneMBB->addLiveIn(SystemZ::CC);
8863
8864 MI.eraseFromParent();
8865 return DoneMBB;
8866}
8867
8868// Emit a move from two GR64s to a GR128.
8870SystemZTargetLowering::emitPair128(MachineInstr &MI,
8871 MachineBasicBlock *MBB) const {
8872 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8873 const DebugLoc &DL = MI.getDebugLoc();
8874
8875 Register Dest = MI.getOperand(0).getReg();
8876 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest)
8877 .add(MI.getOperand(1))
8878 .addImm(SystemZ::subreg_h64)
8879 .add(MI.getOperand(2))
8880 .addImm(SystemZ::subreg_l64);
8881 MI.eraseFromParent();
8882 return MBB;
8883}
8884
8885// Emit an extension from a GR64 to a GR128. ClearEven is true
8886// if the high register of the GR128 value must be cleared or false if
8887// it's "don't care".
8888MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
8890 bool ClearEven) const {
8891 MachineFunction &MF = *MBB->getParent();
8892 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8894 DebugLoc DL = MI.getDebugLoc();
8895
8896 Register Dest = MI.getOperand(0).getReg();
8897 Register Src = MI.getOperand(1).getReg();
8898 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8899
8900 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
8901 if (ClearEven) {
8902 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8903 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
8904
8905 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
8906 .addImm(0);
8907 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
8908 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
8909 In128 = NewIn128;
8910 }
8911 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
8912 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
8913
8914 MI.eraseFromParent();
8915 return MBB;
8916}
8917
8919SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
8921 unsigned Opcode, bool IsMemset) const {
8922 MachineFunction &MF = *MBB->getParent();
8923 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8925 DebugLoc DL = MI.getDebugLoc();
8926
8927 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
8928 uint64_t DestDisp = MI.getOperand(1).getImm();
8929 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
8930 uint64_t SrcDisp;
8931
8932 // Fold the displacement Disp if it is out of range.
8933 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
8934 if (!isUInt<12>(Disp)) {
8935 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8936 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
8937 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
8938 .add(Base).addImm(Disp).addReg(0);
8939 Base = MachineOperand::CreateReg(Reg, false);
8940 Disp = 0;
8941 }
8942 };
8943
8944 if (!IsMemset) {
8945 SrcBase = earlyUseOperand(MI.getOperand(2));
8946 SrcDisp = MI.getOperand(3).getImm();
8947 } else {
8948 SrcBase = DestBase;
8949 SrcDisp = DestDisp++;
8950 foldDisplIfNeeded(DestBase, DestDisp);
8951 }
8952
8953 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
8954 bool IsImmForm = LengthMO.isImm();
8955 bool IsRegForm = !IsImmForm;
8956
8957 // Build and insert one Opcode of Length, with special treatment for memset.
8958 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
8960 MachineOperand DBase, uint64_t DDisp,
8962 unsigned Length) -> void {
8963 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
8964 if (IsMemset) {
8965 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
8966 if (ByteMO.isImm())
8967 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
8968 .add(SBase).addImm(SDisp).add(ByteMO);
8969 else
8970 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
8971 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
8972 if (--Length == 0)
8973 return;
8974 }
8975 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
8976 .add(DBase).addImm(DDisp).addImm(Length)
8977 .add(SBase).addImm(SDisp)
8978 .setMemRefs(MI.memoperands());
8979 };
8980
8981 bool NeedsLoop = false;
8982 uint64_t ImmLength = 0;
8983 Register LenAdjReg = SystemZ::NoRegister;
8984 if (IsImmForm) {
8985 ImmLength = LengthMO.getImm();
8986 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
8987 if (ImmLength == 0) {
8988 MI.eraseFromParent();
8989 return MBB;
8990 }
8991 if (Opcode == SystemZ::CLC) {
8992 if (ImmLength > 3 * 256)
8993 // A two-CLC sequence is a clear win over a loop, not least because
8994 // it needs only one branch. A three-CLC sequence needs the same
8995 // number of branches as a loop (i.e. 2), but is shorter. That
8996 // brings us to lengths greater than 768 bytes. It seems relatively
8997 // likely that a difference will be found within the first 768 bytes,
8998 // so we just optimize for the smallest number of branch
8999 // instructions, in order to avoid polluting the prediction buffer
9000 // too much.
9001 NeedsLoop = true;
9002 } else if (ImmLength > 6 * 256)
9003 // The heuristic we use is to prefer loops for anything that would
9004 // require 7 or more MVCs. With these kinds of sizes there isn't much
9005 // to choose between straight-line code and looping code, since the
9006 // time will be dominated by the MVCs themselves.
9007 NeedsLoop = true;
9008 } else {
9009 NeedsLoop = true;
9010 LenAdjReg = LengthMO.getReg();
9011 }
9012
9013 // When generating more than one CLC, all but the last will need to
9014 // branch to the end when a difference is found.
9015 MachineBasicBlock *EndMBB =
9016 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
9018 : nullptr);
9019
9020 if (NeedsLoop) {
9021 Register StartCountReg =
9022 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
9023 if (IsImmForm) {
9024 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
9025 ImmLength &= 255;
9026 } else {
9027 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
9028 .addReg(LenAdjReg)
9029 .addReg(0)
9030 .addImm(8);
9031 }
9032
9033 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
9034 auto loadZeroAddress = [&]() -> MachineOperand {
9035 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9036 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
9037 return MachineOperand::CreateReg(Reg, false);
9038 };
9039 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
9040 DestBase = loadZeroAddress();
9041 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
9042 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
9043
9044 MachineBasicBlock *StartMBB = nullptr;
9045 MachineBasicBlock *LoopMBB = nullptr;
9046 MachineBasicBlock *NextMBB = nullptr;
9047 MachineBasicBlock *DoneMBB = nullptr;
9048 MachineBasicBlock *AllDoneMBB = nullptr;
9049
9050 Register StartSrcReg = forceReg(MI, SrcBase, TII);
9051 Register StartDestReg =
9052 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
9053
9054 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
9055 Register ThisSrcReg = MRI.createVirtualRegister(RC);
9056 Register ThisDestReg =
9057 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
9058 Register NextSrcReg = MRI.createVirtualRegister(RC);
9059 Register NextDestReg =
9060 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
9061 RC = &SystemZ::GR64BitRegClass;
9062 Register ThisCountReg = MRI.createVirtualRegister(RC);
9063 Register NextCountReg = MRI.createVirtualRegister(RC);
9064
9065 if (IsRegForm) {
9066 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
9067 StartMBB = SystemZ::emitBlockAfter(MBB);
9068 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9069 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
9070 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
9071
9072 // MBB:
9073 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
9074 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9075 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
9076 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9078 .addMBB(AllDoneMBB);
9079 MBB->addSuccessor(AllDoneMBB);
9080 if (!IsMemset)
9081 MBB->addSuccessor(StartMBB);
9082 else {
9083 // MemsetOneCheckMBB:
9084 // # Jump to MemsetOneMBB for a memset of length 1, or
9085 // # fall thru to StartMBB.
9086 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
9087 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
9088 MBB->addSuccessor(MemsetOneCheckMBB);
9089 MBB = MemsetOneCheckMBB;
9090 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9091 .addReg(LenAdjReg).addImm(-1);
9092 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9094 .addMBB(MemsetOneMBB);
9095 MBB->addSuccessor(MemsetOneMBB, {10, 100});
9096 MBB->addSuccessor(StartMBB, {90, 100});
9097
9098 // MemsetOneMBB:
9099 // # Jump back to AllDoneMBB after a single MVI or STC.
9100 MBB = MemsetOneMBB;
9101 insertMemMemOp(MBB, MBB->end(),
9102 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
9103 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
9104 1);
9105 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
9106 MBB->addSuccessor(AllDoneMBB);
9107 }
9108
9109 // StartMBB:
9110 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
9111 MBB = StartMBB;
9112 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9113 .addReg(StartCountReg).addImm(0);
9114 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9116 .addMBB(DoneMBB);
9117 MBB->addSuccessor(DoneMBB);
9118 MBB->addSuccessor(LoopMBB);
9119 }
9120 else {
9121 StartMBB = MBB;
9122 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
9123 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9124 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
9125
9126 // StartMBB:
9127 // # fall through to LoopMBB
9128 MBB->addSuccessor(LoopMBB);
9129
9130 DestBase = MachineOperand::CreateReg(NextDestReg, false);
9131 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
9132 if (EndMBB && !ImmLength)
9133 // If the loop handled the whole CLC range, DoneMBB will be empty with
9134 // CC live-through into EndMBB, so add it as live-in.
9135 DoneMBB->addLiveIn(SystemZ::CC);
9136 }
9137
9138 // LoopMBB:
9139 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
9140 // [ %NextDestReg, NextMBB ]
9141 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
9142 // [ %NextSrcReg, NextMBB ]
9143 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
9144 // [ %NextCountReg, NextMBB ]
9145 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
9146 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
9147 // ( JLH EndMBB )
9148 //
9149 // The prefetch is used only for MVC. The JLH is used only for CLC.
9150 MBB = LoopMBB;
9151 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
9152 .addReg(StartDestReg).addMBB(StartMBB)
9153 .addReg(NextDestReg).addMBB(NextMBB);
9154 if (!HaveSingleBase)
9155 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
9156 .addReg(StartSrcReg).addMBB(StartMBB)
9157 .addReg(NextSrcReg).addMBB(NextMBB);
9158 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
9159 .addReg(StartCountReg).addMBB(StartMBB)
9160 .addReg(NextCountReg).addMBB(NextMBB);
9161 if (Opcode == SystemZ::MVC)
9162 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
9164 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
9165 insertMemMemOp(MBB, MBB->end(),
9166 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
9167 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
9168 if (EndMBB) {
9169 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9171 .addMBB(EndMBB);
9172 MBB->addSuccessor(EndMBB);
9173 MBB->addSuccessor(NextMBB);
9174 }
9175
9176 // NextMBB:
9177 // %NextDestReg = LA 256(%ThisDestReg)
9178 // %NextSrcReg = LA 256(%ThisSrcReg)
9179 // %NextCountReg = AGHI %ThisCountReg, -1
9180 // CGHI %NextCountReg, 0
9181 // JLH LoopMBB
9182 // # fall through to DoneMBB
9183 //
9184 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
9185 MBB = NextMBB;
9186 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
9187 .addReg(ThisDestReg).addImm(256).addReg(0);
9188 if (!HaveSingleBase)
9189 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
9190 .addReg(ThisSrcReg).addImm(256).addReg(0);
9191 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
9192 .addReg(ThisCountReg).addImm(-1);
9193 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9194 .addReg(NextCountReg).addImm(0);
9195 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9197 .addMBB(LoopMBB);
9198 MBB->addSuccessor(LoopMBB);
9199 MBB->addSuccessor(DoneMBB);
9200
9201 MBB = DoneMBB;
9202 if (IsRegForm) {
9203 // DoneMBB:
9204 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
9205 // # Use EXecute Relative Long for the remainder of the bytes. The target
9206 // instruction of the EXRL will have a length field of 1 since 0 is an
9207 // illegal value. The number of bytes processed becomes (%LenAdjReg &
9208 // 0xff) + 1.
9209 // # Fall through to AllDoneMBB.
9210 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9211 Register RemDestReg = HaveSingleBase ? RemSrcReg
9212 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9213 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
9214 .addReg(StartDestReg).addMBB(StartMBB)
9215 .addReg(NextDestReg).addMBB(NextMBB);
9216 if (!HaveSingleBase)
9217 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
9218 .addReg(StartSrcReg).addMBB(StartMBB)
9219 .addReg(NextSrcReg).addMBB(NextMBB);
9220 if (IsMemset)
9221 insertMemMemOp(MBB, MBB->end(),
9222 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
9223 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
9224 MachineInstrBuilder EXRL_MIB =
9225 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
9226 .addImm(Opcode)
9227 .addReg(LenAdjReg)
9228 .addReg(RemDestReg).addImm(DestDisp)
9229 .addReg(RemSrcReg).addImm(SrcDisp);
9230 MBB->addSuccessor(AllDoneMBB);
9231 MBB = AllDoneMBB;
9232 if (Opcode != SystemZ::MVC) {
9233 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
9234 if (EndMBB)
9235 MBB->addLiveIn(SystemZ::CC);
9236 }
9237 }
9239 }
9240
9241 // Handle any remaining bytes with straight-line code.
9242 while (ImmLength > 0) {
9243 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
9244 // The previous iteration might have created out-of-range displacements.
9245 // Apply them using LA/LAY if so.
9246 foldDisplIfNeeded(DestBase, DestDisp);
9247 foldDisplIfNeeded(SrcBase, SrcDisp);
9248 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
9249 DestDisp += ThisLength;
9250 SrcDisp += ThisLength;
9251 ImmLength -= ThisLength;
9252 // If there's another CLC to go, branch to the end if a difference
9253 // was found.
9254 if (EndMBB && ImmLength > 0) {
9256 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9258 .addMBB(EndMBB);
9259 MBB->addSuccessor(EndMBB);
9260 MBB->addSuccessor(NextMBB);
9261 MBB = NextMBB;
9262 }
9263 }
9264 if (EndMBB) {
9265 MBB->addSuccessor(EndMBB);
9266 MBB = EndMBB;
9267 MBB->addLiveIn(SystemZ::CC);
9268 }
9269
9270 MI.eraseFromParent();
9271 return MBB;
9272}
9273
9274// Decompose string pseudo-instruction MI into a loop that continually performs
9275// Opcode until CC != 3.
9276MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
9277 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9278 MachineFunction &MF = *MBB->getParent();
9279 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9281 DebugLoc DL = MI.getDebugLoc();
9282
9283 uint64_t End1Reg = MI.getOperand(0).getReg();
9284 uint64_t Start1Reg = MI.getOperand(1).getReg();
9285 uint64_t Start2Reg = MI.getOperand(2).getReg();
9286 uint64_t CharReg = MI.getOperand(3).getReg();
9287
9288 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
9289 uint64_t This1Reg = MRI.createVirtualRegister(RC);
9290 uint64_t This2Reg = MRI.createVirtualRegister(RC);
9291 uint64_t End2Reg = MRI.createVirtualRegister(RC);
9292
9293 MachineBasicBlock *StartMBB = MBB;
9295 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9296
9297 // StartMBB:
9298 // # fall through to LoopMBB
9299 MBB->addSuccessor(LoopMBB);
9300
9301 // LoopMBB:
9302 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
9303 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
9304 // R0L = %CharReg
9305 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
9306 // JO LoopMBB
9307 // # fall through to DoneMBB
9308 //
9309 // The load of R0L can be hoisted by post-RA LICM.
9310 MBB = LoopMBB;
9311
9312 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
9313 .addReg(Start1Reg).addMBB(StartMBB)
9314 .addReg(End1Reg).addMBB(LoopMBB);
9315 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
9316 .addReg(Start2Reg).addMBB(StartMBB)
9317 .addReg(End2Reg).addMBB(LoopMBB);
9318 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
9319 BuildMI(MBB, DL, TII->get(Opcode))
9320 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
9321 .addReg(This1Reg).addReg(This2Reg);
9322 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9324 MBB->addSuccessor(LoopMBB);
9325 MBB->addSuccessor(DoneMBB);
9326
9327 DoneMBB->addLiveIn(SystemZ::CC);
9328
9329 MI.eraseFromParent();
9330 return DoneMBB;
9331}
9332
9333// Update TBEGIN instruction with final opcode and register clobbers.
9334MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
9335 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
9336 bool NoFloat) const {
9337 MachineFunction &MF = *MBB->getParent();
9338 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9339 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9340
9341 // Update opcode.
9342 MI.setDesc(TII->get(Opcode));
9343
9344 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
9345 // Make sure to add the corresponding GRSM bits if they are missing.
9346 uint64_t Control = MI.getOperand(2).getImm();
9347 static const unsigned GPRControlBit[16] = {
9348 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
9349 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
9350 };
9351 Control |= GPRControlBit[15];
9352 if (TFI->hasFP(MF))
9353 Control |= GPRControlBit[11];
9354 MI.getOperand(2).setImm(Control);
9355
9356 // Add GPR clobbers.
9357 for (int I = 0; I < 16; I++) {
9358 if ((Control & GPRControlBit[I]) == 0) {
9359 unsigned Reg = SystemZMC::GR64Regs[I];
9360 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9361 }
9362 }
9363
9364 // Add FPR/VR clobbers.
9365 if (!NoFloat && (Control & 4) != 0) {
9366 if (Subtarget.hasVector()) {
9367 for (unsigned Reg : SystemZMC::VR128Regs) {
9368 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9369 }
9370 } else {
9371 for (unsigned Reg : SystemZMC::FP64Regs) {
9372 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9373 }
9374 }
9375 }
9376
9377 return MBB;
9378}
9379
9380MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
9381 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9382 MachineFunction &MF = *MBB->getParent();
9384 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9385 DebugLoc DL = MI.getDebugLoc();
9386
9387 Register SrcReg = MI.getOperand(0).getReg();
9388
9389 // Create new virtual register of the same class as source.
9390 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
9391 Register DstReg = MRI->createVirtualRegister(RC);
9392
9393 // Replace pseudo with a normal load-and-test that models the def as
9394 // well.
9395 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
9396 .addReg(SrcReg)
9397 .setMIFlags(MI.getFlags());
9398 MI.eraseFromParent();
9399
9400 return MBB;
9401}
9402
9403MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
9405 MachineFunction &MF = *MBB->getParent();
9407 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9408 DebugLoc DL = MI.getDebugLoc();
9409 const unsigned ProbeSize = getStackProbeSize(MF);
9410 Register DstReg = MI.getOperand(0).getReg();
9411 Register SizeReg = MI.getOperand(2).getReg();
9412
9413 MachineBasicBlock *StartMBB = MBB;
9415 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
9416 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
9417 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
9418 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
9419
9422
9423 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9424 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9425
9426 // LoopTestMBB
9427 // BRC TailTestMBB
9428 // # fallthrough to LoopBodyMBB
9429 StartMBB->addSuccessor(LoopTestMBB);
9430 MBB = LoopTestMBB;
9431 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
9432 .addReg(SizeReg)
9433 .addMBB(StartMBB)
9434 .addReg(IncReg)
9435 .addMBB(LoopBodyMBB);
9436 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
9437 .addReg(PHIReg)
9438 .addImm(ProbeSize);
9439 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9441 .addMBB(TailTestMBB);
9442 MBB->addSuccessor(LoopBodyMBB);
9443 MBB->addSuccessor(TailTestMBB);
9444
9445 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
9446 // J LoopTestMBB
9447 MBB = LoopBodyMBB;
9448 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
9449 .addReg(PHIReg)
9450 .addImm(ProbeSize);
9451 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
9452 .addReg(SystemZ::R15D)
9453 .addImm(ProbeSize);
9454 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9455 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
9456 .setMemRefs(VolLdMMO);
9457 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
9458 MBB->addSuccessor(LoopTestMBB);
9459
9460 // TailTestMBB
9461 // BRC DoneMBB
9462 // # fallthrough to TailMBB
9463 MBB = TailTestMBB;
9464 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9465 .addReg(PHIReg)
9466 .addImm(0);
9467 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9469 .addMBB(DoneMBB);
9470 MBB->addSuccessor(TailMBB);
9471 MBB->addSuccessor(DoneMBB);
9472
9473 // TailMBB
9474 // # fallthrough to DoneMBB
9475 MBB = TailMBB;
9476 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
9477 .addReg(SystemZ::R15D)
9478 .addReg(PHIReg);
9479 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9480 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
9481 .setMemRefs(VolLdMMO);
9482 MBB->addSuccessor(DoneMBB);
9483
9484 // DoneMBB
9485 MBB = DoneMBB;
9486 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
9487 .addReg(SystemZ::R15D);
9488
9489 MI.eraseFromParent();
9490 return DoneMBB;
9491}
9492
9493SDValue SystemZTargetLowering::
9494getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
9496 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
9497 SDLoc DL(SP);
9498 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
9499 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
9500}
9501
9504 switch (MI.getOpcode()) {
9505 case SystemZ::ADJCALLSTACKDOWN:
9506 case SystemZ::ADJCALLSTACKUP:
9507 return emitAdjCallStack(MI, MBB);
9508
9509 case SystemZ::Select32:
9510 case SystemZ::Select64:
9511 case SystemZ::Select128:
9512 case SystemZ::SelectF32:
9513 case SystemZ::SelectF64:
9514 case SystemZ::SelectF128:
9515 case SystemZ::SelectVR32:
9516 case SystemZ::SelectVR64:
9517 case SystemZ::SelectVR128:
9518 return emitSelect(MI, MBB);
9519
9520 case SystemZ::CondStore8Mux:
9521 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
9522 case SystemZ::CondStore8MuxInv:
9523 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
9524 case SystemZ::CondStore16Mux:
9525 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
9526 case SystemZ::CondStore16MuxInv:
9527 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
9528 case SystemZ::CondStore32Mux:
9529 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
9530 case SystemZ::CondStore32MuxInv:
9531 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
9532 case SystemZ::CondStore8:
9533 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
9534 case SystemZ::CondStore8Inv:
9535 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
9536 case SystemZ::CondStore16:
9537 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
9538 case SystemZ::CondStore16Inv:
9539 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
9540 case SystemZ::CondStore32:
9541 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
9542 case SystemZ::CondStore32Inv:
9543 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
9544 case SystemZ::CondStore64:
9545 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
9546 case SystemZ::CondStore64Inv:
9547 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
9548 case SystemZ::CondStoreF32:
9549 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
9550 case SystemZ::CondStoreF32Inv:
9551 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
9552 case SystemZ::CondStoreF64:
9553 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
9554 case SystemZ::CondStoreF64Inv:
9555 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
9556
9557 case SystemZ::SCmp128Hi:
9558 return emitICmp128Hi(MI, MBB, false);
9559 case SystemZ::UCmp128Hi:
9560 return emitICmp128Hi(MI, MBB, true);
9561
9562 case SystemZ::PAIR128:
9563 return emitPair128(MI, MBB);
9564 case SystemZ::AEXT128:
9565 return emitExt128(MI, MBB, false);
9566 case SystemZ::ZEXT128:
9567 return emitExt128(MI, MBB, true);
9568
9569 case SystemZ::ATOMIC_SWAPW:
9570 return emitAtomicLoadBinary(MI, MBB, 0);
9571
9572 case SystemZ::ATOMIC_LOADW_AR:
9573 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
9574 case SystemZ::ATOMIC_LOADW_AFI:
9575 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
9576
9577 case SystemZ::ATOMIC_LOADW_SR:
9578 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
9579
9580 case SystemZ::ATOMIC_LOADW_NR:
9581 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
9582 case SystemZ::ATOMIC_LOADW_NILH:
9583 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
9584
9585 case SystemZ::ATOMIC_LOADW_OR:
9586 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
9587 case SystemZ::ATOMIC_LOADW_OILH:
9588 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
9589
9590 case SystemZ::ATOMIC_LOADW_XR:
9591 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
9592 case SystemZ::ATOMIC_LOADW_XILF:
9593 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
9594
9595 case SystemZ::ATOMIC_LOADW_NRi:
9596 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
9597 case SystemZ::ATOMIC_LOADW_NILHi:
9598 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
9599
9600 case SystemZ::ATOMIC_LOADW_MIN:
9601 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
9602 case SystemZ::ATOMIC_LOADW_MAX:
9603 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
9604 case SystemZ::ATOMIC_LOADW_UMIN:
9605 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
9606 case SystemZ::ATOMIC_LOADW_UMAX:
9607 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
9608
9609 case SystemZ::ATOMIC_CMP_SWAPW:
9610 return emitAtomicCmpSwapW(MI, MBB);
9611 case SystemZ::MVCImm:
9612 case SystemZ::MVCReg:
9613 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
9614 case SystemZ::NCImm:
9615 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
9616 case SystemZ::OCImm:
9617 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
9618 case SystemZ::XCImm:
9619 case SystemZ::XCReg:
9620 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
9621 case SystemZ::CLCImm:
9622 case SystemZ::CLCReg:
9623 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
9624 case SystemZ::MemsetImmImm:
9625 case SystemZ::MemsetImmReg:
9626 case SystemZ::MemsetRegImm:
9627 case SystemZ::MemsetRegReg:
9628 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
9629 case SystemZ::CLSTLoop:
9630 return emitStringWrapper(MI, MBB, SystemZ::CLST);
9631 case SystemZ::MVSTLoop:
9632 return emitStringWrapper(MI, MBB, SystemZ::MVST);
9633 case SystemZ::SRSTLoop:
9634 return emitStringWrapper(MI, MBB, SystemZ::SRST);
9635 case SystemZ::TBEGIN:
9636 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
9637 case SystemZ::TBEGIN_nofloat:
9638 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
9639 case SystemZ::TBEGINC:
9640 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
9641 case SystemZ::LTEBRCompare_Pseudo:
9642 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
9643 case SystemZ::LTDBRCompare_Pseudo:
9644 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
9645 case SystemZ::LTXBRCompare_Pseudo:
9646 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
9647
9648 case SystemZ::PROBED_ALLOCA:
9649 return emitProbedAlloca(MI, MBB);
9650
9651 case TargetOpcode::STACKMAP:
9652 case TargetOpcode::PATCHPOINT:
9653 return emitPatchPoint(MI, MBB);
9654
9655 default:
9656 llvm_unreachable("Unexpected instr type to insert");
9657 }
9658}
9659
9660// This is only used by the isel schedulers, and is needed only to prevent
9661// compiler from crashing when list-ilp is used.
9662const TargetRegisterClass *
9663SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
9664 if (VT == MVT::Untyped)
9665 return &SystemZ::ADDR128BitRegClass;
9667}
9668
9669SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
9670 SelectionDAG &DAG) const {
9671 SDLoc dl(Op);
9672 /*
9673 The rounding method is in FPC Byte 3 bits 6-7, and has the following
9674 settings:
9675 00 Round to nearest
9676 01 Round to 0
9677 10 Round to +inf
9678 11 Round to -inf
9679
9680 FLT_ROUNDS, on the other hand, expects the following:
9681 -1 Undefined
9682 0 Round to 0
9683 1 Round to nearest
9684 2 Round to +inf
9685 3 Round to -inf
9686 */
9687
9688 // Save FPC to register.
9689 SDValue Chain = Op.getOperand(0);
9690 SDValue EFPC(
9691 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
9692 Chain = EFPC.getValue(1);
9693
9694 // Transform as necessary
9695 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
9696 DAG.getConstant(3, dl, MVT::i32));
9697 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
9698 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
9699 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
9700 DAG.getConstant(1, dl, MVT::i32)));
9701
9702 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
9703 DAG.getConstant(1, dl, MVT::i32));
9704 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
9705
9706 return DAG.getMergeValues({RetVal, Chain}, dl);
9707}
9708
9709SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
9710 SelectionDAG &DAG) const {
9711 EVT VT = Op.getValueType();
9712 Op = Op.getOperand(0);
9713 EVT OpVT = Op.getValueType();
9714
9715 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
9716
9717 SDLoc DL(Op);
9718
9719 // load a 0 vector for the third operand of VSUM.
9720 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));
9721
9722 // execute VSUM.
9723 switch (OpVT.getScalarSizeInBits()) {
9724 case 8:
9725 case 16:
9726 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
9727 [[fallthrough]];
9728 case 32:
9729 case 64:
9730 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
9731 DAG.getBitcast(Op.getValueType(), Zero));
9732 break;
9733 case 128:
9734 break; // VSUM over v1i128 should not happen and would be a noop
9735 default:
9736 llvm_unreachable("Unexpected scalar size.");
9737 }
9738 // Cast to original vector type, retrieve last element.
9739 return DAG.getNode(
9740 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),
9741 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));
9742}
unsigned const MachineRegisterInfo * MRI
#define Success
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
amdgpu AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
Function Alias Analysis Results
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
IRTranslator LLVM IR MI
#define RegName(no)
lazy value info
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
uint64_t High
uint64_t IntrinsicInst * II
#define P(N)
const char LLVMTargetMachineRef TM
static bool isSelectPseudo(MachineInstr &MI)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
#define OPCODE(NAME)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:77
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1499
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1365
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1471
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1309
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:237
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1447
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition: APInt.h:301
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1236
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:837
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:830
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:695
@ Add
*p = old + v
Definition: Instructions.h:711
@ Or
*p = old | v
Definition: Instructions.h:719
@ Sub
*p = old - v
Definition: Instructions.h:713
@ And
*p = old & v
Definition: Instructions.h:715
@ Xor
*p = old ^ v
Definition: Instructions.h:721
BinOp getOperation() const
Definition: Instructions.h:786
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:391
The address of a basic block.
Definition: Constants.h:890
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:716
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:728
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:274
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:690
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:582
bool hasPrivateLinkage() const
Definition: GlobalValue.h:527
bool hasInternalLinkage() const
Definition: GlobalValue.h:526
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:173
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setMaxCallFrameSize(unsigned S)
MachineFunctionProperties & reset(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getMachineOpcode() const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:227
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:736
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:492
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:746
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:842
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:486
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:876
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:487
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:787
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:690
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:782
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:481
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:813
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:859
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:499
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:753
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:568
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void reserve(size_type N)
Definition: SmallVector.h:676
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:289
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:463
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:258
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:677
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
iterator end() const
Definition: StringRef.h:113
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
A SystemZ-specific class detailing special use registers particular for calling conventions.
A SystemZ-specific constant pool value.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const
const TargetFrameLowering * getFrameLowering() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
const SystemZRegisterInfo * getRegisterInfo() const override
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
bool useSoftFloat() const override
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Let target indicate that an extending atomic load of the specified type is legal.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
use_iterator use_begin()
Definition: Value.h:360
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:199
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:764
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1147
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1143
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:737
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:484
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1290
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:728
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1176
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1292
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1262
@ STRICT_FCEIL
Definition: ISDOpcodes.h:434
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1293
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1052
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:797
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:491
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
Definition: ISDOpcodes.h:1275
@ STRICT_FMINIMUM
Definition: ISDOpcodes.h:444
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:804
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:702
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1249
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1254
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:834
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:485
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1288
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:917
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1289
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1431
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:788
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:458
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:628
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1242
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1009
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:944
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1098
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1291
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1077
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:515
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:741
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1258
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:635
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1172
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:438
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1385
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:894
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:659
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:719
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:608
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1286
@ STRICT_FMAXIMUM
Definition: ISDOpcodes.h:443
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:432
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:543
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:794
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:433
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:756
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1294
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:986
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1062
@ ConstantPool
Definition: ISDOpcodes.h:82
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:823
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:812
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:902
@ STRICT_FROUND
Definition: ISDOpcodes.h:436
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:750
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:457
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:435
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1284
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:451
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:473
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:450
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1005
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1285
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:850
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1203
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:478
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:694
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1229
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:532
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1283
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:883
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:845
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:431
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:800
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1091
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:777
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:501
@ AssertZext
Definition: ISDOpcodes.h:62
@ STRICT_FRINT
Definition: ISDOpcodes.h:430
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1341
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:523
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1554
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1534
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition: SystemZ.h:40
static bool isImmHH(uint64_t Val)
Definition: SystemZ.h:176
const unsigned CCMASK_TEND
Definition: SystemZ.h:97
const unsigned CCMASK_CS_EQ
Definition: SystemZ.h:67
const unsigned CCMASK_TBEGIN
Definition: SystemZ.h:92
const unsigned CCMASK_0
Definition: SystemZ.h:27
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition: SystemZ.h:82
const unsigned CCMASK_LOGICAL_CARRY
Definition: SystemZ.h:60
const unsigned TDCMASK_NORMAL_MINUS
Definition: SystemZ.h:122
const unsigned CCMASK_TDC
Definition: SystemZ.h:109
const unsigned CCMASK_FCMP
Definition: SystemZ.h:48
const unsigned CCMASK_TM_SOME_0
Definition: SystemZ.h:81
static bool isImmHL(uint64_t Val)
Definition: SystemZ.h:171
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition: SystemZ.h:124
const unsigned PFD_READ
Definition: SystemZ.h:115
const unsigned CCMASK_1
Definition: SystemZ.h:28
const unsigned TDCMASK_NORMAL_PLUS
Definition: SystemZ.h:121
const unsigned PFD_WRITE
Definition: SystemZ.h:116
const unsigned CCMASK_CMP_GT
Definition: SystemZ.h:37
const unsigned TDCMASK_QNAN_MINUS
Definition: SystemZ.h:128
const unsigned CCMASK_CS
Definition: SystemZ.h:69
const unsigned CCMASK_ANY
Definition: SystemZ.h:31
const unsigned CCMASK_ARITH
Definition: SystemZ.h:55
const unsigned CCMASK_TM_MIXED_MSB_0
Definition: SystemZ.h:78
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition: SystemZ.h:123
static bool isImmLL(uint64_t Val)
Definition: SystemZ.h:161
const unsigned VectorBits
Definition: SystemZ.h:154
static bool isImmLH(uint64_t Val)
Definition: SystemZ.h:166
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition: SystemZ.h:125
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition: SystemZ.h:77
const unsigned IPM_CC
Definition: SystemZ.h:112
const unsigned CCMASK_CMP_LE
Definition: SystemZ.h:39
const unsigned CCMASK_CMP_O
Definition: SystemZ.h:44
const unsigned CCMASK_CMP_EQ
Definition: SystemZ.h:35
const unsigned VectorBytes
Definition: SystemZ.h:158
const unsigned TDCMASK_INFINITY_MINUS
Definition: SystemZ.h:126
const unsigned CCMASK_ICMP
Definition: SystemZ.h:47
const unsigned CCMASK_VCMP_ALL
Definition: SystemZ.h:101
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition: SystemZ.h:104
const unsigned CCMASK_TM_MIXED_MSB_1
Definition: SystemZ.h:79
const unsigned CCMASK_TM_MSB_0
Definition: SystemZ.h:83
const unsigned CCMASK_ARITH_OVERFLOW
Definition: SystemZ.h:54
const unsigned CCMASK_CS_NE
Definition: SystemZ.h:68
const unsigned TDCMASK_SNAN_PLUS
Definition: SystemZ.h:129
const unsigned CCMASK_TM
Definition: SystemZ.h:85
const unsigned CCMASK_3
Definition: SystemZ.h:30
const unsigned CCMASK_CMP_LT
Definition: SystemZ.h:36
const unsigned CCMASK_CMP_NE
Definition: SystemZ.h:38
const unsigned TDCMASK_ZERO_PLUS
Definition: SystemZ.h:119
const unsigned TDCMASK_QNAN_PLUS
Definition: SystemZ.h:127
const unsigned TDCMASK_ZERO_MINUS
Definition: SystemZ.h:120
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition: SystemZ.h:80
const unsigned CCMASK_LOGICAL_BORROW
Definition: SystemZ.h:62
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition: SystemZ.h:43
const unsigned CCMASK_LOGICAL
Definition: SystemZ.h:64
const unsigned CCMASK_TM_MSB_1
Definition: SystemZ.h:84
const unsigned TDCMASK_SNAN_MINUS
Definition: SystemZ.h:130
@ GeneralDynamic
Definition: CodeGen.h:46
@ GS
Definition: X86.h:205
Reg
All possible values of the reg field in the ModR/M byte.
support::ulittle32_t Word
Definition: IRSymtab.h:52
NodeAddr< CodeNode * > Code
Definition: RDFGraph.h:388
constexpr const char32_t SBase
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:337
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Done
Definition: Threading.h:61
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ Mul
Product of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:509
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define EQ(a, b)
Definition: regexec.c:112
#define NC
Definition: regutils.h:42
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:203
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:175
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:161
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:70
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:300
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:169
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:134
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})