LLVM 19.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
23#include "llvm/IR/GlobalAlias.h"
25#include "llvm/IR/Intrinsics.h"
26#include "llvm/IR/IntrinsicsS390.h"
30#include <cctype>
31#include <optional>
32
33using namespace llvm;
34
35#define DEBUG_TYPE "systemz-lower"
36
37namespace {
38// Represents information about a comparison.
39struct Comparison {
40 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
41 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
42 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
43
44 // The operands to the comparison.
45 SDValue Op0, Op1;
46
47 // Chain if this is a strict floating-point comparison.
48 SDValue Chain;
49
50 // The opcode that should be used to compare Op0 and Op1.
51 unsigned Opcode;
52
53 // A SystemZICMP value. Only used for integer comparisons.
54 unsigned ICmpType;
55
56 // The mask of CC values that Opcode can produce.
57 unsigned CCValid;
58
59 // The mask of CC values for which the original condition is true.
60 unsigned CCMask;
61};
62} // end anonymous namespace
63
64// Classify VT as either 32 or 64 bit.
65static bool is32Bit(EVT VT) {
66 switch (VT.getSimpleVT().SimpleTy) {
67 case MVT::i32:
68 return true;
69 case MVT::i64:
70 return false;
71 default:
72 llvm_unreachable("Unsupported type");
73 }
74}
75
76// Return a version of MachineOperand that can be safely used before the
77// final use.
79 if (Op.isReg())
80 Op.setIsKill(false);
81 return Op;
82}
83
85 const SystemZSubtarget &STI)
86 : TargetLowering(TM), Subtarget(STI) {
87 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
88
89 auto *Regs = STI.getSpecialRegisters();
90
91 // Set up the register classes.
92 if (Subtarget.hasHighWord())
93 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
94 else
95 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
96 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
97 if (!useSoftFloat()) {
98 if (Subtarget.hasVector()) {
99 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
100 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
101 } else {
102 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
103 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
104 }
105 if (Subtarget.hasVectorEnhancements1())
106 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
107 else
108 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
109
110 if (Subtarget.hasVector()) {
111 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
112 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
113 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
114 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
115 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
116 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
117 }
118
119 if (Subtarget.hasVector())
120 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
121 }
122
123 // Compute derived properties from the register classes
125
126 // Set up special registers.
127 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
128
129 // TODO: It may be better to default to latency-oriented scheduling, however
130 // LLVM's current latency-oriented scheduler can't handle physreg definitions
131 // such as SystemZ has with CC, so set this to the register-pressure
132 // scheduler, because it can.
134
137
139
140 // Instructions are strings of 2-byte aligned 2-byte values.
142 // For performance reasons we prefer 16-byte alignment.
144
145 // Handle operations that are handled in a similar way for all types.
146 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
147 I <= MVT::LAST_FP_VALUETYPE;
148 ++I) {
150 if (isTypeLegal(VT)) {
151 // Lower SET_CC into an IPM-based sequence.
155
156 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
158
159 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
162 }
163 }
164
165 // Expand jump table branches as address arithmetic followed by an
166 // indirect jump.
168
169 // Expand BRCOND into a BR_CC (see above).
171
172 // Handle integer types except i128.
173 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
174 I <= MVT::LAST_INTEGER_VALUETYPE;
175 ++I) {
177 if (isTypeLegal(VT) && VT != MVT::i128) {
179
180 // Expand individual DIV and REMs into DIVREMs.
187
188 // Support addition/subtraction with overflow.
191
192 // Support addition/subtraction with carry.
195
196 // Support carry in as value rather than glue.
199
200 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
201 // available, or if the operand is constant.
203
204 // Use POPCNT on z196 and above.
205 if (Subtarget.hasPopulationCount())
207 else
209
210 // No special instructions for these.
213
214 // Use *MUL_LOHI where possible instead of MULH*.
219
220 // Only z196 and above have native support for conversions to unsigned.
221 // On z10, promoting to i64 doesn't generate an inexact condition for
222 // values that are outside the i32 range but in the i64 range, so use
223 // the default expansion.
224 if (!Subtarget.hasFPExtension())
226
227 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
228 // default to Expand, so need to be modified to Legal where appropriate.
230 if (Subtarget.hasFPExtension())
232
233 // And similarly for STRICT_[SU]INT_TO_FP.
235 if (Subtarget.hasFPExtension())
237 }
238 }
239
240 // Handle i128 if legal.
241 if (isTypeLegal(MVT::i128)) {
242 // No special instructions for these.
258
259 // Support addition/subtraction with carry.
264
265 // Use VPOPCT and add up partial results.
267
268 // We have to use libcalls for these.
277 }
278
279 // Type legalization will convert 8- and 16-bit atomic operations into
280 // forms that operate on i32s (but still keeping the original memory VT).
281 // Lower them into full i32 operations.
293
294 // Whether or not i128 is not a legal type, we need to custom lower
295 // the atomic operations in order to exploit SystemZ instructions.
300
301 // Mark sign/zero extending atomic loads as legal, which will make
302 // DAGCombiner fold extensions into atomic loads if possible.
304 {MVT::i8, MVT::i16, MVT::i32}, Legal);
306 {MVT::i8, MVT::i16}, Legal);
308 MVT::i8, Legal);
309
310 // We can use the CC result of compare-and-swap to implement
311 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
315
317
318 // Traps are legal, as we will convert them to "j .+2".
319 setOperationAction(ISD::TRAP, MVT::Other, Legal);
320
321 // z10 has instructions for signed but not unsigned FP conversion.
322 // Handle unsigned 32-bit types as signed 64-bit types.
323 if (!Subtarget.hasFPExtension()) {
328 }
329
330 // We have native support for a 64-bit CTLZ, via FLOGR.
334
335 // On z15 we have native support for a 64-bit CTPOP.
336 if (Subtarget.hasMiscellaneousExtensions3()) {
339 }
340
341 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
343
344 // Expand 128 bit shifts without using a libcall.
348
349 // Also expand 256 bit shifts if i128 is a legal type.
350 if (isTypeLegal(MVT::i128)) {
354 }
355
356 // Handle bitcast from fp128 to i128.
357 if (!isTypeLegal(MVT::i128))
359
360 // We have native instructions for i8, i16 and i32 extensions, but not i1.
362 for (MVT VT : MVT::integer_valuetypes()) {
366 }
367
368 // Handle the various types of symbolic address.
374
375 // We need to handle dynamic allocations specially because of the
376 // 160-byte area at the bottom of the stack.
379
382
383 // Handle prefetches with PFD or PFDRL.
385
386 // Handle readcyclecounter with STCKF.
388
390 // Assume by default that all vector operations need to be expanded.
391 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
392 if (getOperationAction(Opcode, VT) == Legal)
393 setOperationAction(Opcode, VT, Expand);
394
395 // Likewise all truncating stores and extending loads.
396 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
397 setTruncStoreAction(VT, InnerVT, Expand);
400 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
401 }
402
403 if (isTypeLegal(VT)) {
404 // These operations are legal for anything that can be stored in a
405 // vector register, even if there is no native support for the format
406 // as such. In particular, we can do these for v4f32 even though there
407 // are no specific instructions for that format.
413
414 // Likewise, except that we need to replace the nodes with something
415 // more specific.
418 }
419 }
420
421 // Handle integer vector types.
423 if (isTypeLegal(VT)) {
424 // These operations have direct equivalents.
429 if (VT != MVT::v2i64)
435 if (Subtarget.hasVectorEnhancements1())
437 else
441
442 // Convert a GPR scalar to a vector by inserting it into element 0.
444
445 // Use a series of unpacks for extensions.
448
449 // Detect shifts/rotates by a scalar amount and convert them into
450 // V*_BY_SCALAR.
455
456 // Add ISD::VECREDUCE_ADD as custom in order to implement
457 // it with VZERO+VSUM
459
460 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
461 // and inverting the result as necessary.
463 }
464 }
465
466 if (Subtarget.hasVector()) {
467 // There should be no need to check for float types other than v2f64
468 // since <2 x f32> isn't a legal type.
477
486 }
487
488 if (Subtarget.hasVectorEnhancements2()) {
497
506 }
507
508 // Handle floating-point types.
509 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
510 I <= MVT::LAST_FP_VALUETYPE;
511 ++I) {
513 if (isTypeLegal(VT)) {
514 // We can use FI for FRINT.
516
517 // We can use the extended form of FI for other rounding operations.
518 if (Subtarget.hasFPExtension()) {
524 }
525
526 // No special instructions for these.
532
533 // Special treatment.
535
536 // Handle constrained floating-point operations.
546 if (Subtarget.hasFPExtension()) {
552 }
553 }
554 }
555
556 // Handle floating-point vector types.
557 if (Subtarget.hasVector()) {
558 // Scalar-to-vector conversion is just a subreg.
561
562 // Some insertions and extractions can be done directly but others
563 // need to go via integers.
568
569 // These operations have direct equivalents.
570 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
571 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
572 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
573 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
574 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
575 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
576 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
577 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
578 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
581 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
584
585 // Handle constrained floating-point operations.
598
603 if (Subtarget.hasVectorEnhancements1()) {
606 }
607 }
608
609 // The vector enhancements facility 1 has instructions for these.
610 if (Subtarget.hasVectorEnhancements1()) {
611 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
612 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
613 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
614 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
615 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
616 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
617 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
618 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
619 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
622 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
625
630
635
640
645
650
651 // Handle constrained floating-point operations.
664 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
665 MVT::v4f32, MVT::v2f64 }) {
670 }
671 }
672
673 // We only have fused f128 multiply-addition on vector registers.
674 if (!Subtarget.hasVectorEnhancements1()) {
677 }
678
679 // We don't have a copysign instruction on vector registers.
680 if (Subtarget.hasVectorEnhancements1())
682
683 // Needed so that we don't try to implement f128 constant loads using
684 // a load-and-extend of a f80 constant (in cases where the constant
685 // would fit in an f80).
686 for (MVT VT : MVT::fp_valuetypes())
687 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
688
689 // We don't have extending load instruction on vector registers.
690 if (Subtarget.hasVectorEnhancements1()) {
691 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
692 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
693 }
694
695 // Floating-point truncation and stores need to be done separately.
696 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
697 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
698 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
699
700 // We have 64-bit FPR<->GPR moves, but need special handling for
701 // 32-bit forms.
702 if (!Subtarget.hasVector()) {
705 }
706
707 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
708 // structure, but VAEND is a no-op.
712
714
715 // Codes for which we want to perform some z-specific combinations.
719 ISD::LOAD,
730 ISD::SDIV,
731 ISD::UDIV,
732 ISD::SREM,
733 ISD::UREM,
736
737 // Handle intrinsics.
740
741 // We want to use MVC in preference to even a single load/store pair.
742 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
744
745 // The main memset sequence is a byte store followed by an MVC.
746 // Two STC or MV..I stores win over that, but the kind of fused stores
747 // generated by target-independent code don't when the byte value is
748 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
749 // than "STC;MVC". Handle the choice in target-specific code instead.
750 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
752
753 // Default to having -disable-strictnode-mutation on
754 IsStrictFPEnabled = true;
755
756 if (Subtarget.isTargetzOS()) {
757 struct RTLibCallMapping {
758 RTLIB::Libcall Code;
759 const char *Name;
760 };
761 static RTLibCallMapping RTLibCallCommon[] = {
762#define HANDLE_LIBCALL(code, name) {RTLIB::code, name},
763#include "ZOSLibcallNames.def"
764 };
765 for (auto &E : RTLibCallCommon)
766 setLibcallName(E.Code, E.Name);
767 }
768}
769
771 return Subtarget.hasSoftFloat();
772}
773
775 LLVMContext &, EVT VT) const {
776 if (!VT.isVector())
777 return MVT::i32;
779}
780
782 const MachineFunction &MF, EVT VT) const {
783 VT = VT.getScalarType();
784
785 if (!VT.isSimple())
786 return false;
787
788 switch (VT.getSimpleVT().SimpleTy) {
789 case MVT::f32:
790 case MVT::f64:
791 return true;
792 case MVT::f128:
793 return Subtarget.hasVectorEnhancements1();
794 default:
795 break;
796 }
797
798 return false;
799}
800
801// Return true if the constant can be generated with a vector instruction,
802// such as VGM, VGMB or VREPI.
804 const SystemZSubtarget &Subtarget) {
805 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
806 if (!Subtarget.hasVector() ||
807 (isFP128 && !Subtarget.hasVectorEnhancements1()))
808 return false;
809
810 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
811 // preferred way of creating all-zero and all-one vectors so give it
812 // priority over other methods below.
813 unsigned Mask = 0;
814 unsigned I = 0;
815 for (; I < SystemZ::VectorBytes; ++I) {
816 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
817 if (Byte == 0xff)
818 Mask |= 1ULL << I;
819 else if (Byte != 0)
820 break;
821 }
822 if (I == SystemZ::VectorBytes) {
824 OpVals.push_back(Mask);
826 return true;
827 }
828
829 if (SplatBitSize > 64)
830 return false;
831
832 auto tryValue = [&](uint64_t Value) -> bool {
833 // Try VECTOR REPLICATE IMMEDIATE
834 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
835 if (isInt<16>(SignedValue)) {
836 OpVals.push_back(((unsigned) SignedValue));
839 SystemZ::VectorBits / SplatBitSize);
840 return true;
841 }
842 // Try VECTOR GENERATE MASK
843 unsigned Start, End;
844 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
845 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
846 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
847 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
848 OpVals.push_back(Start - (64 - SplatBitSize));
849 OpVals.push_back(End - (64 - SplatBitSize));
852 SystemZ::VectorBits / SplatBitSize);
853 return true;
854 }
855 return false;
856 };
857
858 // First try assuming that any undefined bits above the highest set bit
859 // and below the lowest set bit are 1s. This increases the likelihood of
860 // being able to use a sign-extended element value in VECTOR REPLICATE
861 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
862 uint64_t SplatBitsZ = SplatBits.getZExtValue();
863 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
864 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
865 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
866 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
867 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
868 if (tryValue(SplatBitsZ | Upper | Lower))
869 return true;
870
871 // Now try assuming that any undefined bits between the first and
872 // last defined set bits are set. This increases the chances of
873 // using a non-wraparound mask.
874 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
875 return tryValue(SplatBitsZ | Middle);
876}
877
879 if (IntImm.isSingleWord()) {
880 IntBits = APInt(128, IntImm.getZExtValue());
881 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
882 } else
883 IntBits = IntImm;
884 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
885
886 // Find the smallest splat.
887 SplatBits = IntImm;
888 unsigned Width = SplatBits.getBitWidth();
889 while (Width > 8) {
890 unsigned HalfSize = Width / 2;
891 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
892 APInt LowValue = SplatBits.trunc(HalfSize);
893
894 // If the two halves do not match, stop here.
895 if (HighValue != LowValue || 8 > HalfSize)
896 break;
897
898 SplatBits = HighValue;
899 Width = HalfSize;
900 }
901 SplatUndef = 0;
902 SplatBitSize = Width;
903}
904
906 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
907 bool HasAnyUndefs;
908
909 // Get IntBits by finding the 128 bit splat.
910 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
911 true);
912
913 // Get SplatBits by finding the 8 bit or greater splat.
914 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
915 true);
916}
917
919 bool ForCodeSize) const {
920 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
921 if (Imm.isZero() || Imm.isNegZero())
922 return true;
923
925}
926
927/// Returns true if stack probing through inline assembly is requested.
929 // If the function specifically requests inline stack probes, emit them.
930 if (MF.getFunction().hasFnAttribute("probe-stack"))
931 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
932 "inline-asm";
933 return false;
934}
935
939}
940
944}
945
948 // Don't expand subword operations as they require special treatment.
949 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
951
952 // Don't expand if there is a target instruction available.
953 if (Subtarget.hasInterlockedAccess1() &&
954 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
961
963}
964
966 // We can use CGFI or CLGFI.
967 return isInt<32>(Imm) || isUInt<32>(Imm);
968}
969
971 // We can use ALGFI or SLGFI.
972 return isUInt<32>(Imm) || isUInt<32>(-Imm);
973}
974
976 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
977 // Unaligned accesses should never be slower than the expanded version.
978 // We check specifically for aligned accesses in the few cases where
979 // they are required.
980 if (Fast)
981 *Fast = 1;
982 return true;
983}
984
985// Information about the addressing mode for a memory access.
987 // True if a long displacement is supported.
989
990 // True if use of index register is supported.
992
993 AddressingMode(bool LongDispl, bool IdxReg) :
994 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
995};
996
997// Return the desired addressing mode for a Load which has only one use (in
998// the same block) which is a Store.
1000 Type *Ty) {
1001 // With vector support a Load->Store combination may be combined to either
1002 // an MVC or vector operations and it seems to work best to allow the
1003 // vector addressing mode.
1004 if (HasVector)
1005 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1006
1007 // Otherwise only the MVC case is special.
1008 bool MVC = Ty->isIntegerTy(8);
1009 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1010}
1011
1012// Return the addressing mode which seems most desirable given an LLVM
1013// Instruction pointer.
1014static AddressingMode
1016 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1017 switch (II->getIntrinsicID()) {
1018 default: break;
1019 case Intrinsic::memset:
1020 case Intrinsic::memmove:
1021 case Intrinsic::memcpy:
1022 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1023 }
1024 }
1025
1026 if (isa<LoadInst>(I) && I->hasOneUse()) {
1027 auto *SingleUser = cast<Instruction>(*I->user_begin());
1028 if (SingleUser->getParent() == I->getParent()) {
1029 if (isa<ICmpInst>(SingleUser)) {
1030 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1031 if (C->getBitWidth() <= 64 &&
1032 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1033 // Comparison of memory with 16 bit signed / unsigned immediate
1034 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1035 } else if (isa<StoreInst>(SingleUser))
1036 // Load->Store
1037 return getLoadStoreAddrMode(HasVector, I->getType());
1038 }
1039 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1040 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1041 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1042 // Load->Store
1043 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1044 }
1045
1046 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1047
1048 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1049 // dependencies (LDE only supports small offsets).
1050 // * Utilize the vector registers to hold floating point
1051 // values (vector load / store instructions only support small
1052 // offsets).
1053
1054 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1055 I->getOperand(0)->getType());
1056 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1057 bool IsVectorAccess = MemAccessTy->isVectorTy();
1058
1059 // A store of an extracted vector element will be combined into a VSTE type
1060 // instruction.
1061 if (!IsVectorAccess && isa<StoreInst>(I)) {
1062 Value *DataOp = I->getOperand(0);
1063 if (isa<ExtractElementInst>(DataOp))
1064 IsVectorAccess = true;
1065 }
1066
1067 // A load which gets inserted into a vector element will be combined into a
1068 // VLE type instruction.
1069 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1070 User *LoadUser = *I->user_begin();
1071 if (isa<InsertElementInst>(LoadUser))
1072 IsVectorAccess = true;
1073 }
1074
1075 if (IsFPAccess || IsVectorAccess)
1076 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1077 }
1078
1079 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1080}
1081
1083 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1084 // Punt on globals for now, although they can be used in limited
1085 // RELATIVE LONG cases.
1086 if (AM.BaseGV)
1087 return false;
1088
1089 // Require a 20-bit signed offset.
1090 if (!isInt<20>(AM.BaseOffs))
1091 return false;
1092
1093 bool RequireD12 =
1094 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1095 AddressingMode SupportedAM(!RequireD12, true);
1096 if (I != nullptr)
1097 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1098
1099 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1100 return false;
1101
1102 if (!SupportedAM.IndexReg)
1103 // No indexing allowed.
1104 return AM.Scale == 0;
1105 else
1106 // Indexing is OK but no scale factor can be applied.
1107 return AM.Scale == 0 || AM.Scale == 1;
1108}
1109
1111 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
1112 unsigned SrcAS, const AttributeList &FuncAttributes) const {
1113 const int MVCFastLen = 16;
1114
1115 if (Limit != ~unsigned(0)) {
1116 // Don't expand Op into scalar loads/stores in these cases:
1117 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1118 return false; // Small memcpy: Use MVC
1119 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1120 return false; // Small memset (first byte with STC/MVI): Use MVC
1121 if (Op.isZeroMemset())
1122 return false; // Memset zero: Use XC
1123 }
1124
1125 return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
1126 SrcAS, FuncAttributes);
1127}
1128
1130 const AttributeList &FuncAttributes) const {
1131 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1132}
1133
1134bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1135 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1136 return false;
1137 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1138 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1139 return FromBits > ToBits;
1140}
1141
1143 if (!FromVT.isInteger() || !ToVT.isInteger())
1144 return false;
1145 unsigned FromBits = FromVT.getFixedSizeInBits();
1146 unsigned ToBits = ToVT.getFixedSizeInBits();
1147 return FromBits > ToBits;
1148}
1149
1150//===----------------------------------------------------------------------===//
1151// Inline asm support
1152//===----------------------------------------------------------------------===//
1153
1156 if (Constraint.size() == 1) {
1157 switch (Constraint[0]) {
1158 case 'a': // Address register
1159 case 'd': // Data register (equivalent to 'r')
1160 case 'f': // Floating-point register
1161 case 'h': // High-part register
1162 case 'r': // General-purpose register
1163 case 'v': // Vector register
1164 return C_RegisterClass;
1165
1166 case 'Q': // Memory with base and unsigned 12-bit displacement
1167 case 'R': // Likewise, plus an index
1168 case 'S': // Memory with base and signed 20-bit displacement
1169 case 'T': // Likewise, plus an index
1170 case 'm': // Equivalent to 'T'.
1171 return C_Memory;
1172
1173 case 'I': // Unsigned 8-bit constant
1174 case 'J': // Unsigned 12-bit constant
1175 case 'K': // Signed 16-bit constant
1176 case 'L': // Signed 20-bit displacement (on all targets we support)
1177 case 'M': // 0x7fffffff
1178 return C_Immediate;
1179
1180 default:
1181 break;
1182 }
1183 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1184 switch (Constraint[1]) {
1185 case 'Q': // Address with base and unsigned 12-bit displacement
1186 case 'R': // Likewise, plus an index
1187 case 'S': // Address with base and signed 20-bit displacement
1188 case 'T': // Likewise, plus an index
1189 return C_Address;
1190
1191 default:
1192 break;
1193 }
1194 }
1195 return TargetLowering::getConstraintType(Constraint);
1196}
1197
1200 const char *constraint) const {
1202 Value *CallOperandVal = info.CallOperandVal;
1203 // If we don't have a value, we can't do a match,
1204 // but allow it at the lowest weight.
1205 if (!CallOperandVal)
1206 return CW_Default;
1207 Type *type = CallOperandVal->getType();
1208 // Look at the constraint type.
1209 switch (*constraint) {
1210 default:
1212 break;
1213
1214 case 'a': // Address register
1215 case 'd': // Data register (equivalent to 'r')
1216 case 'h': // High-part register
1217 case 'r': // General-purpose register
1218 weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1219 break;
1220
1221 case 'f': // Floating-point register
1222 if (!useSoftFloat())
1223 weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1224 break;
1225
1226 case 'v': // Vector register
1227 if (Subtarget.hasVector())
1228 weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1229 : CW_Default;
1230 break;
1231
1232 case 'I': // Unsigned 8-bit constant
1233 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1234 if (isUInt<8>(C->getZExtValue()))
1235 weight = CW_Constant;
1236 break;
1237
1238 case 'J': // Unsigned 12-bit constant
1239 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1240 if (isUInt<12>(C->getZExtValue()))
1241 weight = CW_Constant;
1242 break;
1243
1244 case 'K': // Signed 16-bit constant
1245 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1246 if (isInt<16>(C->getSExtValue()))
1247 weight = CW_Constant;
1248 break;
1249
1250 case 'L': // Signed 20-bit displacement (on all targets we support)
1251 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1252 if (isInt<20>(C->getSExtValue()))
1253 weight = CW_Constant;
1254 break;
1255
1256 case 'M': // 0x7fffffff
1257 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1258 if (C->getZExtValue() == 0x7fffffff)
1259 weight = CW_Constant;
1260 break;
1261 }
1262 return weight;
1263}
1264
1265// Parse a "{tNNN}" register constraint for which the register type "t"
1266// has already been verified. MC is the class associated with "t" and
1267// Map maps 0-based register numbers to LLVM register numbers.
1268static std::pair<unsigned, const TargetRegisterClass *>
1270 const unsigned *Map, unsigned Size) {
1271 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1272 if (isdigit(Constraint[2])) {
1273 unsigned Index;
1274 bool Failed =
1275 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1276 if (!Failed && Index < Size && Map[Index])
1277 return std::make_pair(Map[Index], RC);
1278 }
1279 return std::make_pair(0U, nullptr);
1280}
1281
1282std::pair<unsigned, const TargetRegisterClass *>
1284 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1285 if (Constraint.size() == 1) {
1286 // GCC Constraint Letters
1287 switch (Constraint[0]) {
1288 default: break;
1289 case 'd': // Data register (equivalent to 'r')
1290 case 'r': // General-purpose register
1291 if (VT.getSizeInBits() == 64)
1292 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1293 else if (VT.getSizeInBits() == 128)
1294 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1295 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1296
1297 case 'a': // Address register
1298 if (VT == MVT::i64)
1299 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1300 else if (VT == MVT::i128)
1301 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1302 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1303
1304 case 'h': // High-part register (an LLVM extension)
1305 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1306
1307 case 'f': // Floating-point register
1308 if (!useSoftFloat()) {
1309 if (VT.getSizeInBits() == 64)
1310 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1311 else if (VT.getSizeInBits() == 128)
1312 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1313 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1314 }
1315 break;
1316
1317 case 'v': // Vector register
1318 if (Subtarget.hasVector()) {
1319 if (VT.getSizeInBits() == 32)
1320 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1321 if (VT.getSizeInBits() == 64)
1322 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1323 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1324 }
1325 break;
1326 }
1327 }
1328 if (Constraint.starts_with("{")) {
1329
1330 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1331 // to check the size on.
1332 auto getVTSizeInBits = [&VT]() {
1333 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1334 };
1335
1336 // We need to override the default register parsing for GPRs and FPRs
1337 // because the interpretation depends on VT. The internal names of
1338 // the registers are also different from the external names
1339 // (F0D and F0S instead of F0, etc.).
1340 if (Constraint[1] == 'r') {
1341 if (getVTSizeInBits() == 32)
1342 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1344 if (getVTSizeInBits() == 128)
1345 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1347 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1349 }
1350 if (Constraint[1] == 'f') {
1351 if (useSoftFloat())
1352 return std::make_pair(
1353 0u, static_cast<const TargetRegisterClass *>(nullptr));
1354 if (getVTSizeInBits() == 32)
1355 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1357 if (getVTSizeInBits() == 128)
1358 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1360 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1362 }
1363 if (Constraint[1] == 'v') {
1364 if (!Subtarget.hasVector())
1365 return std::make_pair(
1366 0u, static_cast<const TargetRegisterClass *>(nullptr));
1367 if (getVTSizeInBits() == 32)
1368 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1370 if (getVTSizeInBits() == 64)
1371 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1373 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1375 }
1376 }
1377 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1378}
1379
1380// FIXME? Maybe this could be a TableGen attribute on some registers and
1381// this table could be generated automatically from RegInfo.
1384 const MachineFunction &MF) const {
1385 Register Reg =
1387 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D : 0)
1388 .Case("r15", Subtarget.isTargetELF() ? SystemZ::R15D : 0)
1389 .Default(0);
1390
1391 if (Reg)
1392 return Reg;
1393 report_fatal_error("Invalid register name global variable");
1394}
1395
1397 const Constant *PersonalityFn) const {
1398 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1399}
1400
1402 const Constant *PersonalityFn) const {
1403 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1404}
1405
1407 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1408 SelectionDAG &DAG) const {
1409 // Only support length 1 constraints for now.
1410 if (Constraint.size() == 1) {
1411 switch (Constraint[0]) {
1412 case 'I': // Unsigned 8-bit constant
1413 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1414 if (isUInt<8>(C->getZExtValue()))
1415 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1416 Op.getValueType()));
1417 return;
1418
1419 case 'J': // Unsigned 12-bit constant
1420 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1421 if (isUInt<12>(C->getZExtValue()))
1422 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1423 Op.getValueType()));
1424 return;
1425
1426 case 'K': // Signed 16-bit constant
1427 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1428 if (isInt<16>(C->getSExtValue()))
1429 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1430 Op.getValueType()));
1431 return;
1432
1433 case 'L': // Signed 20-bit displacement (on all targets we support)
1434 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1435 if (isInt<20>(C->getSExtValue()))
1436 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1437 Op.getValueType()));
1438 return;
1439
1440 case 'M': // 0x7fffffff
1441 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1442 if (C->getZExtValue() == 0x7fffffff)
1443 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1444 Op.getValueType()));
1445 return;
1446 }
1447 }
1448 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1449}
1450
1451//===----------------------------------------------------------------------===//
1452// Calling conventions
1453//===----------------------------------------------------------------------===//
1454
1455#include "SystemZGenCallingConv.inc"
1456
1458 CallingConv::ID) const {
1459 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1460 SystemZ::R14D, 0 };
1461 return ScratchRegs;
1462}
1463
1465 Type *ToType) const {
1466 return isTruncateFree(FromType, ToType);
1467}
1468
1470 return CI->isTailCall();
1471}
1472
1473// Value is a value that has been passed to us in the location described by VA
1474// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1475// any loads onto Chain.
1477 CCValAssign &VA, SDValue Chain,
1478 SDValue Value) {
1479 // If the argument has been promoted from a smaller type, insert an
1480 // assertion to capture this.
1481 if (VA.getLocInfo() == CCValAssign::SExt)
1483 DAG.getValueType(VA.getValVT()));
1484 else if (VA.getLocInfo() == CCValAssign::ZExt)
1486 DAG.getValueType(VA.getValVT()));
1487
1488 if (VA.isExtInLoc())
1489 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1490 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1491 // If this is a short vector argument loaded from the stack,
1492 // extend from i64 to full vector size and then bitcast.
1493 assert(VA.getLocVT() == MVT::i64);
1494 assert(VA.getValVT().isVector());
1495 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1496 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1497 } else
1498 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1499 return Value;
1500}
1501
1502// Value is a value of type VA.getValVT() that we need to copy into
1503// the location described by VA. Return a copy of Value converted to
1504// VA.getValVT(). The caller is responsible for handling indirect values.
1506 CCValAssign &VA, SDValue Value) {
1507 switch (VA.getLocInfo()) {
1508 case CCValAssign::SExt:
1509 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1510 case CCValAssign::ZExt:
1511 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1512 case CCValAssign::AExt:
1513 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1514 case CCValAssign::BCvt: {
1515 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1516 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1517 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1518 // For an f32 vararg we need to first promote it to an f64 and then
1519 // bitcast it to an i64.
1520 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1521 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1522 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1523 ? MVT::v2i64
1524 : VA.getLocVT();
1525 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1526 // For ELF, this is a short vector argument to be stored to the stack,
1527 // bitcast to v2i64 and then extract first element.
1528 if (BitCastToType == MVT::v2i64)
1529 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1530 DAG.getConstant(0, DL, MVT::i32));
1531 return Value;
1532 }
1533 case CCValAssign::Full:
1534 return Value;
1535 default:
1536 llvm_unreachable("Unhandled getLocInfo()");
1537 }
1538}
1539
1541 SDLoc DL(In);
1542 SDValue Lo, Hi;
1543 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1544 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1545 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1546 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1547 DAG.getConstant(64, DL, MVT::i32)));
1548 } else {
1549 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1550 }
1551
1552 // FIXME: If v2i64 were a legal type, we could use it instead of
1553 // Untyped here. This might enable improved folding.
1554 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1555 MVT::Untyped, Hi, Lo);
1556 return SDValue(Pair, 0);
1557}
1558
1560 SDLoc DL(In);
1561 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1562 DL, MVT::i64, In);
1563 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1564 DL, MVT::i64, In);
1565
1566 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1567 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1568 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1569 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1570 DAG.getConstant(64, DL, MVT::i32));
1571 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1572 } else {
1573 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1574 }
1575}
1576
1578 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1579 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1580 EVT ValueVT = Val.getValueType();
1581 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1582 // Inline assembly operand.
1583 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1584 return true;
1585 }
1586
1587 return false;
1588}
1589
1591 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1592 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1593 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1594 // Inline assembly operand.
1595 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1596 return DAG.getBitcast(ValueVT, Res);
1597 }
1598
1599 return SDValue();
1600}
1601
1603 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1604 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1605 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1607 MachineFrameInfo &MFI = MF.getFrameInfo();
1609 SystemZMachineFunctionInfo *FuncInfo =
1611 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1612 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1613
1614 // Assign locations to all of the incoming arguments.
1616 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1617 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1618 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
1619
1620 unsigned NumFixedGPRs = 0;
1621 unsigned NumFixedFPRs = 0;
1622 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1623 SDValue ArgValue;
1624 CCValAssign &VA = ArgLocs[I];
1625 EVT LocVT = VA.getLocVT();
1626 if (VA.isRegLoc()) {
1627 // Arguments passed in registers
1628 const TargetRegisterClass *RC;
1629 switch (LocVT.getSimpleVT().SimpleTy) {
1630 default:
1631 // Integers smaller than i64 should be promoted to i64.
1632 llvm_unreachable("Unexpected argument type");
1633 case MVT::i32:
1634 NumFixedGPRs += 1;
1635 RC = &SystemZ::GR32BitRegClass;
1636 break;
1637 case MVT::i64:
1638 NumFixedGPRs += 1;
1639 RC = &SystemZ::GR64BitRegClass;
1640 break;
1641 case MVT::f32:
1642 NumFixedFPRs += 1;
1643 RC = &SystemZ::FP32BitRegClass;
1644 break;
1645 case MVT::f64:
1646 NumFixedFPRs += 1;
1647 RC = &SystemZ::FP64BitRegClass;
1648 break;
1649 case MVT::f128:
1650 NumFixedFPRs += 2;
1651 RC = &SystemZ::FP128BitRegClass;
1652 break;
1653 case MVT::v16i8:
1654 case MVT::v8i16:
1655 case MVT::v4i32:
1656 case MVT::v2i64:
1657 case MVT::v4f32:
1658 case MVT::v2f64:
1659 RC = &SystemZ::VR128BitRegClass;
1660 break;
1661 }
1662
1663 Register VReg = MRI.createVirtualRegister(RC);
1664 MRI.addLiveIn(VA.getLocReg(), VReg);
1665 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1666 } else {
1667 assert(VA.isMemLoc() && "Argument not register or memory");
1668
1669 // Create the frame index object for this incoming parameter.
1670 // FIXME: Pre-include call frame size in the offset, should not
1671 // need to manually add it here.
1672 int64_t ArgSPOffset = VA.getLocMemOffset();
1673 if (Subtarget.isTargetXPLINK64()) {
1674 auto &XPRegs =
1676 ArgSPOffset += XPRegs.getCallFrameSize();
1677 }
1678 int FI =
1679 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1680
1681 // Create the SelectionDAG nodes corresponding to a load
1682 // from this parameter. Unpromoted ints and floats are
1683 // passed as right-justified 8-byte values.
1684 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1685 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1686 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1687 DAG.getIntPtrConstant(4, DL));
1688 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1690 }
1691
1692 // Convert the value of the argument register into the value that's
1693 // being passed.
1694 if (VA.getLocInfo() == CCValAssign::Indirect) {
1695 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1697 // If the original argument was split (e.g. i128), we need
1698 // to load all parts of it here (using the same address).
1699 unsigned ArgIndex = Ins[I].OrigArgIndex;
1700 assert (Ins[I].PartOffset == 0);
1701 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1702 CCValAssign &PartVA = ArgLocs[I + 1];
1703 unsigned PartOffset = Ins[I + 1].PartOffset;
1704 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1705 DAG.getIntPtrConstant(PartOffset, DL));
1706 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1708 ++I;
1709 }
1710 } else
1711 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1712 }
1713
1714 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
1715 // Save the number of non-varargs registers for later use by va_start, etc.
1716 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1717 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1718
1719 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1720 Subtarget.getSpecialRegisters());
1721
1722 // Likewise the address (in the form of a frame index) of where the
1723 // first stack vararg would be. The 1-byte size here is arbitrary.
1724 // FIXME: Pre-include call frame size in the offset, should not
1725 // need to manually add it here.
1726 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
1727 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
1728 FuncInfo->setVarArgsFrameIndex(FI);
1729 }
1730
1731 if (IsVarArg && Subtarget.isTargetELF()) {
1732 // Save the number of non-varargs registers for later use by va_start, etc.
1733 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1734 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1735
1736 // Likewise the address (in the form of a frame index) of where the
1737 // first stack vararg would be. The 1-byte size here is arbitrary.
1738 int64_t VarArgsOffset = CCInfo.getStackSize();
1739 FuncInfo->setVarArgsFrameIndex(
1740 MFI.CreateFixedObject(1, VarArgsOffset, true));
1741
1742 // ...and a similar frame index for the caller-allocated save area
1743 // that will be used to store the incoming registers.
1744 int64_t RegSaveOffset =
1745 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1746 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1747 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1748
1749 // Store the FPR varargs in the reserved frame slots. (We store the
1750 // GPRs as part of the prologue.)
1751 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1753 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1754 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1755 int FI =
1757 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1759 &SystemZ::FP64BitRegClass);
1760 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1761 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1763 }
1764 // Join the stores, which are independent of one another.
1765 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1766 ArrayRef(&MemOps[NumFixedFPRs],
1767 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
1768 }
1769 }
1770
1771 if (Subtarget.isTargetXPLINK64()) {
1772 // Create virual register for handling incoming "ADA" special register (R5)
1773 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
1774 Register ADAvReg = MRI.createVirtualRegister(RC);
1775 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1776 Subtarget.getSpecialRegisters());
1777 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
1778 FuncInfo->setADAVirtualRegister(ADAvReg);
1779 }
1780 return Chain;
1781}
1782
1783static bool canUseSiblingCall(const CCState &ArgCCInfo,
1786 // Punt if there are any indirect or stack arguments, or if the call
1787 // needs the callee-saved argument register R6, or if the call uses
1788 // the callee-saved register arguments SwiftSelf and SwiftError.
1789 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1790 CCValAssign &VA = ArgLocs[I];
1792 return false;
1793 if (!VA.isRegLoc())
1794 return false;
1795 Register Reg = VA.getLocReg();
1796 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1797 return false;
1798 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1799 return false;
1800 }
1801 return true;
1802}
1803
1805 unsigned Offset, bool LoadAdr = false) {
1808 unsigned ADAvReg = MFI->getADAVirtualRegister();
1810
1811 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
1812 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
1813
1814 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
1815 if (!LoadAdr)
1816 Result = DAG.getLoad(
1817 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
1819
1820 return Result;
1821}
1822
1823// ADA access using Global value
1824// Note: for functions, address of descriptor is returned
1826 EVT PtrVT) {
1827 unsigned ADAtype;
1828 bool LoadAddr = false;
1829 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
1830 bool IsFunction =
1831 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
1832 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
1833
1834 if (IsFunction) {
1835 if (IsInternal) {
1837 LoadAddr = true;
1838 } else
1840 } else {
1842 }
1843 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
1844
1845 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
1846}
1847
1848static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
1849 SDLoc &DL, SDValue &Chain) {
1850 unsigned ADADelta = 0; // ADA offset in desc.
1851 unsigned EPADelta = 8; // EPA offset in desc.
1854
1855 // XPLink calling convention.
1856 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1857 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
1858 G->getGlobal()->hasPrivateLinkage());
1859 if (IsInternal) {
1862 unsigned ADAvReg = MFI->getADAVirtualRegister();
1863 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
1864 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1865 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1866 return true;
1867 } else {
1869 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
1870 ADA = getADAEntry(DAG, GA, DL, ADADelta);
1871 Callee = getADAEntry(DAG, GA, DL, EPADelta);
1872 }
1873 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1875 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
1876 ADA = getADAEntry(DAG, ES, DL, ADADelta);
1877 Callee = getADAEntry(DAG, ES, DL, EPADelta);
1878 } else {
1879 // Function pointer case
1880 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
1881 DAG.getConstant(ADADelta, DL, PtrVT));
1882 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
1884 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
1885 DAG.getConstant(EPADelta, DL, PtrVT));
1886 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
1888 }
1889 return false;
1890}
1891
1892SDValue
1894 SmallVectorImpl<SDValue> &InVals) const {
1895 SelectionDAG &DAG = CLI.DAG;
1896 SDLoc &DL = CLI.DL;
1898 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1900 SDValue Chain = CLI.Chain;
1901 SDValue Callee = CLI.Callee;
1902 bool &IsTailCall = CLI.IsTailCall;
1903 CallingConv::ID CallConv = CLI.CallConv;
1904 bool IsVarArg = CLI.IsVarArg;
1906 EVT PtrVT = getPointerTy(MF.getDataLayout());
1907 LLVMContext &Ctx = *DAG.getContext();
1909
1910 // FIXME: z/OS support to be added in later.
1911 if (Subtarget.isTargetXPLINK64())
1912 IsTailCall = false;
1913
1914 // Analyze the operands of the call, assigning locations to each operand.
1916 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1917 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1918
1919 // We don't support GuaranteedTailCallOpt, only automatically-detected
1920 // sibling calls.
1921 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1922 IsTailCall = false;
1923
1924 // Get a count of how many bytes are to be pushed on the stack.
1925 unsigned NumBytes = ArgCCInfo.getStackSize();
1926
1927 // Mark the start of the call.
1928 if (!IsTailCall)
1929 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1930
1931 // Copy argument values to their designated locations.
1933 SmallVector<SDValue, 8> MemOpChains;
1934 SDValue StackPtr;
1935 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1936 CCValAssign &VA = ArgLocs[I];
1937 SDValue ArgValue = OutVals[I];
1938
1939 if (VA.getLocInfo() == CCValAssign::Indirect) {
1940 // Store the argument in a stack slot and pass its address.
1941 unsigned ArgIndex = Outs[I].OrigArgIndex;
1942 EVT SlotVT;
1943 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1944 // Allocate the full stack space for a promoted (and split) argument.
1945 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1946 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1947 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1948 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1949 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1950 } else {
1951 SlotVT = Outs[I].VT;
1952 }
1953 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1954 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1955 MemOpChains.push_back(
1956 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1958 // If the original argument was split (e.g. i128), we need
1959 // to store all parts of it here (and pass just one address).
1960 assert (Outs[I].PartOffset == 0);
1961 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1962 SDValue PartValue = OutVals[I + 1];
1963 unsigned PartOffset = Outs[I + 1].PartOffset;
1964 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1965 DAG.getIntPtrConstant(PartOffset, DL));
1966 MemOpChains.push_back(
1967 DAG.getStore(Chain, DL, PartValue, Address,
1969 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
1970 SlotVT.getStoreSize()) && "Not enough space for argument part!");
1971 ++I;
1972 }
1973 ArgValue = SpillSlot;
1974 } else
1975 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1976
1977 if (VA.isRegLoc()) {
1978 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
1979 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
1980 // and low values.
1981 if (VA.getLocVT() == MVT::i128)
1982 ArgValue = lowerI128ToGR128(DAG, ArgValue);
1983 // Queue up the argument copies and emit them at the end.
1984 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1985 } else {
1986 assert(VA.isMemLoc() && "Argument not register or memory");
1987
1988 // Work out the address of the stack slot. Unpromoted ints and
1989 // floats are passed as right-justified 8-byte values.
1990 if (!StackPtr.getNode())
1991 StackPtr = DAG.getCopyFromReg(Chain, DL,
1992 Regs->getStackPointerRegister(), PtrVT);
1993 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
1994 VA.getLocMemOffset();
1995 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1996 Offset += 4;
1997 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1999
2000 // Emit the store.
2001 MemOpChains.push_back(
2002 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2003
2004 // Although long doubles or vectors are passed through the stack when
2005 // they are vararg (non-fixed arguments), if a long double or vector
2006 // occupies the third and fourth slot of the argument list GPR3 should
2007 // still shadow the third slot of the argument list.
2008 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2009 SDValue ShadowArgValue =
2010 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2011 DAG.getIntPtrConstant(1, DL));
2012 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2013 }
2014 }
2015 }
2016
2017 // Join the stores, which are independent of one another.
2018 if (!MemOpChains.empty())
2019 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2020
2021 // Accept direct calls by converting symbolic call addresses to the
2022 // associated Target* opcodes. Force %r1 to be used for indirect
2023 // tail calls.
2024 SDValue Glue;
2025
2026 if (Subtarget.isTargetXPLINK64()) {
2027 SDValue ADA;
2028 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2029 if (!IsBRASL) {
2030 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2031 ->getAddressOfCalleeRegister();
2032 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2033 Glue = Chain.getValue(1);
2034 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2035 }
2036 RegsToPass.push_back(std::make_pair(
2037 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2038 } else {
2039 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2040 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2041 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2042 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2043 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2044 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2045 } else if (IsTailCall) {
2046 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2047 Glue = Chain.getValue(1);
2048 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2049 }
2050 }
2051
2052 // Build a sequence of copy-to-reg nodes, chained and glued together.
2053 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
2054 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
2055 RegsToPass[I].second, Glue);
2056 Glue = Chain.getValue(1);
2057 }
2058
2059 // The first call operand is the chain and the second is the target address.
2061 Ops.push_back(Chain);
2062 Ops.push_back(Callee);
2063
2064 // Add argument registers to the end of the list so that they are
2065 // known live into the call.
2066 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
2067 Ops.push_back(DAG.getRegister(RegsToPass[I].first,
2068 RegsToPass[I].second.getValueType()));
2069
2070 // Add a register mask operand representing the call-preserved registers.
2071 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2072 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2073 assert(Mask && "Missing call preserved mask for calling convention");
2074 Ops.push_back(DAG.getRegisterMask(Mask));
2075
2076 // Glue the call to the argument copies, if any.
2077 if (Glue.getNode())
2078 Ops.push_back(Glue);
2079
2080 // Emit the call.
2081 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2082 if (IsTailCall) {
2083 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2084 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2085 return Ret;
2086 }
2087 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2088 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2089 Glue = Chain.getValue(1);
2090
2091 // Mark the end of the call, which is glued to the call itself.
2092 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2093 Glue = Chain.getValue(1);
2094
2095 // Assign locations to each value returned by this call.
2097 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2098 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2099
2100 // Copy all of the result registers out of their specified physreg.
2101 for (CCValAssign &VA : RetLocs) {
2102 // Copy the value out, gluing the copy to the end of the call sequence.
2103 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2104 VA.getLocVT(), Glue);
2105 Chain = RetValue.getValue(1);
2106 Glue = RetValue.getValue(2);
2107
2108 // Convert the value of the return register into the value that's
2109 // being returned.
2110 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2111 }
2112
2113 return Chain;
2114}
2115
2116// Generate a call taking the given operands as arguments and returning a
2117// result of type RetVT.
2119 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2120 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2121 bool DoesNotReturn, bool IsReturnValueUsed) const {
2123 Args.reserve(Ops.size());
2124
2126 for (SDValue Op : Ops) {
2127 Entry.Node = Op;
2128 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
2129 Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
2130 Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
2131 Args.push_back(Entry);
2132 }
2133
2134 SDValue Callee =
2135 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2136
2137 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2139 bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned);
2140 CLI.setDebugLoc(DL)
2141 .setChain(Chain)
2142 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2143 .setNoReturn(DoesNotReturn)
2144 .setDiscardResult(!IsReturnValueUsed)
2145 .setSExtResult(SignExtend)
2146 .setZExtResult(!SignExtend);
2147 return LowerCallTo(CLI);
2148}
2149
2152 MachineFunction &MF, bool isVarArg,
2154 LLVMContext &Context) const {
2155 // Special case that we cannot easily detect in RetCC_SystemZ since
2156 // i128 may not be a legal type.
2157 for (auto &Out : Outs)
2158 if (Out.ArgVT == MVT::i128)
2159 return false;
2160
2162 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
2163 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2164}
2165
2166SDValue
2168 bool IsVarArg,
2170 const SmallVectorImpl<SDValue> &OutVals,
2171 const SDLoc &DL, SelectionDAG &DAG) const {
2173
2174 // Assign locations to each returned value.
2176 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2177 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2178
2179 // Quick exit for void returns
2180 if (RetLocs.empty())
2181 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2182
2183 if (CallConv == CallingConv::GHC)
2184 report_fatal_error("GHC functions return void only");
2185
2186 // Copy the result values into the output registers.
2187 SDValue Glue;
2189 RetOps.push_back(Chain);
2190 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2191 CCValAssign &VA = RetLocs[I];
2192 SDValue RetValue = OutVals[I];
2193
2194 // Make the return register live on exit.
2195 assert(VA.isRegLoc() && "Can only return in registers!");
2196
2197 // Promote the value as required.
2198 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2199
2200 // Chain and glue the copies together.
2201 Register Reg = VA.getLocReg();
2202 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2203 Glue = Chain.getValue(1);
2204 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2205 }
2206
2207 // Update chain and glue.
2208 RetOps[0] = Chain;
2209 if (Glue.getNode())
2210 RetOps.push_back(Glue);
2211
2212 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2213}
2214
2215// Return true if Op is an intrinsic node with chain that returns the CC value
2216// as its only (other) argument. Provide the associated SystemZISD opcode and
2217// the mask of valid CC values if so.
2218static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2219 unsigned &CCValid) {
2220 unsigned Id = Op.getConstantOperandVal(1);
2221 switch (Id) {
2222 case Intrinsic::s390_tbegin:
2223 Opcode = SystemZISD::TBEGIN;
2224 CCValid = SystemZ::CCMASK_TBEGIN;
2225 return true;
2226
2227 case Intrinsic::s390_tbegin_nofloat:
2229 CCValid = SystemZ::CCMASK_TBEGIN;
2230 return true;
2231
2232 case Intrinsic::s390_tend:
2233 Opcode = SystemZISD::TEND;
2234 CCValid = SystemZ::CCMASK_TEND;
2235 return true;
2236
2237 default:
2238 return false;
2239 }
2240}
2241
2242// Return true if Op is an intrinsic node without chain that returns the
2243// CC value as its final argument. Provide the associated SystemZISD
2244// opcode and the mask of valid CC values if so.
2245static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2246 unsigned Id = Op.getConstantOperandVal(0);
2247 switch (Id) {
2248 case Intrinsic::s390_vpkshs:
2249 case Intrinsic::s390_vpksfs:
2250 case Intrinsic::s390_vpksgs:
2251 Opcode = SystemZISD::PACKS_CC;
2252 CCValid = SystemZ::CCMASK_VCMP;
2253 return true;
2254
2255 case Intrinsic::s390_vpklshs:
2256 case Intrinsic::s390_vpklsfs:
2257 case Intrinsic::s390_vpklsgs:
2258 Opcode = SystemZISD::PACKLS_CC;
2259 CCValid = SystemZ::CCMASK_VCMP;
2260 return true;
2261
2262 case Intrinsic::s390_vceqbs:
2263 case Intrinsic::s390_vceqhs:
2264 case Intrinsic::s390_vceqfs:
2265 case Intrinsic::s390_vceqgs:
2266 Opcode = SystemZISD::VICMPES;
2267 CCValid = SystemZ::CCMASK_VCMP;
2268 return true;
2269
2270 case Intrinsic::s390_vchbs:
2271 case Intrinsic::s390_vchhs:
2272 case Intrinsic::s390_vchfs:
2273 case Intrinsic::s390_vchgs:
2274 Opcode = SystemZISD::VICMPHS;
2275 CCValid = SystemZ::CCMASK_VCMP;
2276 return true;
2277
2278 case Intrinsic::s390_vchlbs:
2279 case Intrinsic::s390_vchlhs:
2280 case Intrinsic::s390_vchlfs:
2281 case Intrinsic::s390_vchlgs:
2282 Opcode = SystemZISD::VICMPHLS;
2283 CCValid = SystemZ::CCMASK_VCMP;
2284 return true;
2285
2286 case Intrinsic::s390_vtm:
2287 Opcode = SystemZISD::VTM;
2288 CCValid = SystemZ::CCMASK_VCMP;
2289 return true;
2290
2291 case Intrinsic::s390_vfaebs:
2292 case Intrinsic::s390_vfaehs:
2293 case Intrinsic::s390_vfaefs:
2294 Opcode = SystemZISD::VFAE_CC;
2295 CCValid = SystemZ::CCMASK_ANY;
2296 return true;
2297
2298 case Intrinsic::s390_vfaezbs:
2299 case Intrinsic::s390_vfaezhs:
2300 case Intrinsic::s390_vfaezfs:
2301 Opcode = SystemZISD::VFAEZ_CC;
2302 CCValid = SystemZ::CCMASK_ANY;
2303 return true;
2304
2305 case Intrinsic::s390_vfeebs:
2306 case Intrinsic::s390_vfeehs:
2307 case Intrinsic::s390_vfeefs:
2308 Opcode = SystemZISD::VFEE_CC;
2309 CCValid = SystemZ::CCMASK_ANY;
2310 return true;
2311
2312 case Intrinsic::s390_vfeezbs:
2313 case Intrinsic::s390_vfeezhs:
2314 case Intrinsic::s390_vfeezfs:
2315 Opcode = SystemZISD::VFEEZ_CC;
2316 CCValid = SystemZ::CCMASK_ANY;
2317 return true;
2318
2319 case Intrinsic::s390_vfenebs:
2320 case Intrinsic::s390_vfenehs:
2321 case Intrinsic::s390_vfenefs:
2322 Opcode = SystemZISD::VFENE_CC;
2323 CCValid = SystemZ::CCMASK_ANY;
2324 return true;
2325
2326 case Intrinsic::s390_vfenezbs:
2327 case Intrinsic::s390_vfenezhs:
2328 case Intrinsic::s390_vfenezfs:
2329 Opcode = SystemZISD::VFENEZ_CC;
2330 CCValid = SystemZ::CCMASK_ANY;
2331 return true;
2332
2333 case Intrinsic::s390_vistrbs:
2334 case Intrinsic::s390_vistrhs:
2335 case Intrinsic::s390_vistrfs:
2336 Opcode = SystemZISD::VISTR_CC;
2338 return true;
2339
2340 case Intrinsic::s390_vstrcbs:
2341 case Intrinsic::s390_vstrchs:
2342 case Intrinsic::s390_vstrcfs:
2343 Opcode = SystemZISD::VSTRC_CC;
2344 CCValid = SystemZ::CCMASK_ANY;
2345 return true;
2346
2347 case Intrinsic::s390_vstrczbs:
2348 case Intrinsic::s390_vstrczhs:
2349 case Intrinsic::s390_vstrczfs:
2350 Opcode = SystemZISD::VSTRCZ_CC;
2351 CCValid = SystemZ::CCMASK_ANY;
2352 return true;
2353
2354 case Intrinsic::s390_vstrsb:
2355 case Intrinsic::s390_vstrsh:
2356 case Intrinsic::s390_vstrsf:
2357 Opcode = SystemZISD::VSTRS_CC;
2358 CCValid = SystemZ::CCMASK_ANY;
2359 return true;
2360
2361 case Intrinsic::s390_vstrszb:
2362 case Intrinsic::s390_vstrszh:
2363 case Intrinsic::s390_vstrszf:
2364 Opcode = SystemZISD::VSTRSZ_CC;
2365 CCValid = SystemZ::CCMASK_ANY;
2366 return true;
2367
2368 case Intrinsic::s390_vfcedbs:
2369 case Intrinsic::s390_vfcesbs:
2370 Opcode = SystemZISD::VFCMPES;
2371 CCValid = SystemZ::CCMASK_VCMP;
2372 return true;
2373
2374 case Intrinsic::s390_vfchdbs:
2375 case Intrinsic::s390_vfchsbs:
2376 Opcode = SystemZISD::VFCMPHS;
2377 CCValid = SystemZ::CCMASK_VCMP;
2378 return true;
2379
2380 case Intrinsic::s390_vfchedbs:
2381 case Intrinsic::s390_vfchesbs:
2382 Opcode = SystemZISD::VFCMPHES;
2383 CCValid = SystemZ::CCMASK_VCMP;
2384 return true;
2385
2386 case Intrinsic::s390_vftcidb:
2387 case Intrinsic::s390_vftcisb:
2388 Opcode = SystemZISD::VFTCI;
2389 CCValid = SystemZ::CCMASK_VCMP;
2390 return true;
2391
2392 case Intrinsic::s390_tdc:
2393 Opcode = SystemZISD::TDC;
2394 CCValid = SystemZ::CCMASK_TDC;
2395 return true;
2396
2397 default:
2398 return false;
2399 }
2400}
2401
2402// Emit an intrinsic with chain and an explicit CC register result.
2404 unsigned Opcode) {
2405 // Copy all operands except the intrinsic ID.
2406 unsigned NumOps = Op.getNumOperands();
2408 Ops.reserve(NumOps - 1);
2409 Ops.push_back(Op.getOperand(0));
2410 for (unsigned I = 2; I < NumOps; ++I)
2411 Ops.push_back(Op.getOperand(I));
2412
2413 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2414 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2415 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2416 SDValue OldChain = SDValue(Op.getNode(), 1);
2417 SDValue NewChain = SDValue(Intr.getNode(), 1);
2418 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2419 return Intr.getNode();
2420}
2421
2422// Emit an intrinsic with an explicit CC register result.
2424 unsigned Opcode) {
2425 // Copy all operands except the intrinsic ID.
2426 unsigned NumOps = Op.getNumOperands();
2428 Ops.reserve(NumOps - 1);
2429 for (unsigned I = 1; I < NumOps; ++I)
2430 Ops.push_back(Op.getOperand(I));
2431
2432 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2433 return Intr.getNode();
2434}
2435
2436// CC is a comparison that will be implemented using an integer or
2437// floating-point comparison. Return the condition code mask for
2438// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2439// unsigned comparisons and clear for signed ones. In the floating-point
2440// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2442#define CONV(X) \
2443 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2444 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2445 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2446
2447 switch (CC) {
2448 default:
2449 llvm_unreachable("Invalid integer condition!");
2450
2451 CONV(EQ);
2452 CONV(NE);
2453 CONV(GT);
2454 CONV(GE);
2455 CONV(LT);
2456 CONV(LE);
2457
2458 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2460 }
2461#undef CONV
2462}
2463
2464// If C can be converted to a comparison against zero, adjust the operands
2465// as necessary.
2466static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2467 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2468 return;
2469
2470 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2471 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2472 return;
2473
2474 int64_t Value = ConstOp1->getSExtValue();
2475 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2476 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2477 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2478 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2479 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2480 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2481 }
2482}
2483
2484// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2485// adjust the operands as necessary.
2486static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2487 Comparison &C) {
2488 // For us to make any changes, it must a comparison between a single-use
2489 // load and a constant.
2490 if (!C.Op0.hasOneUse() ||
2491 C.Op0.getOpcode() != ISD::LOAD ||
2492 C.Op1.getOpcode() != ISD::Constant)
2493 return;
2494
2495 // We must have an 8- or 16-bit load.
2496 auto *Load = cast<LoadSDNode>(C.Op0);
2497 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2498 if ((NumBits != 8 && NumBits != 16) ||
2499 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2500 return;
2501
2502 // The load must be an extending one and the constant must be within the
2503 // range of the unextended value.
2504 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2505 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2506 return;
2507 uint64_t Value = ConstOp1->getZExtValue();
2508 uint64_t Mask = (1 << NumBits) - 1;
2509 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2510 // Make sure that ConstOp1 is in range of C.Op0.
2511 int64_t SignedValue = ConstOp1->getSExtValue();
2512 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2513 return;
2514 if (C.ICmpType != SystemZICMP::SignedOnly) {
2515 // Unsigned comparison between two sign-extended values is equivalent
2516 // to unsigned comparison between two zero-extended values.
2517 Value &= Mask;
2518 } else if (NumBits == 8) {
2519 // Try to treat the comparison as unsigned, so that we can use CLI.
2520 // Adjust CCMask and Value as necessary.
2521 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2522 // Test whether the high bit of the byte is set.
2523 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2524 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2525 // Test whether the high bit of the byte is clear.
2526 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2527 else
2528 // No instruction exists for this combination.
2529 return;
2530 C.ICmpType = SystemZICMP::UnsignedOnly;
2531 }
2532 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2533 if (Value > Mask)
2534 return;
2535 // If the constant is in range, we can use any comparison.
2536 C.ICmpType = SystemZICMP::Any;
2537 } else
2538 return;
2539
2540 // Make sure that the first operand is an i32 of the right extension type.
2541 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2544 if (C.Op0.getValueType() != MVT::i32 ||
2545 Load->getExtensionType() != ExtType) {
2546 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2547 Load->getBasePtr(), Load->getPointerInfo(),
2548 Load->getMemoryVT(), Load->getAlign(),
2549 Load->getMemOperand()->getFlags());
2550 // Update the chain uses.
2551 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2552 }
2553
2554 // Make sure that the second operand is an i32 with the right value.
2555 if (C.Op1.getValueType() != MVT::i32 ||
2556 Value != ConstOp1->getZExtValue())
2557 C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2558}
2559
2560// Return true if Op is either an unextended load, or a load suitable
2561// for integer register-memory comparisons of type ICmpType.
2562static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2563 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2564 if (Load) {
2565 // There are no instructions to compare a register with a memory byte.
2566 if (Load->getMemoryVT() == MVT::i8)
2567 return false;
2568 // Otherwise decide on extension type.
2569 switch (Load->getExtensionType()) {
2570 case ISD::NON_EXTLOAD:
2571 return true;
2572 case ISD::SEXTLOAD:
2573 return ICmpType != SystemZICMP::UnsignedOnly;
2574 case ISD::ZEXTLOAD:
2575 return ICmpType != SystemZICMP::SignedOnly;
2576 default:
2577 break;
2578 }
2579 }
2580 return false;
2581}
2582
2583// Return true if it is better to swap the operands of C.
2584static bool shouldSwapCmpOperands(const Comparison &C) {
2585 // Leave i128 and f128 comparisons alone, since they have no memory forms.
2586 if (C.Op0.getValueType() == MVT::i128)
2587 return false;
2588 if (C.Op0.getValueType() == MVT::f128)
2589 return false;
2590
2591 // Always keep a floating-point constant second, since comparisons with
2592 // zero can use LOAD TEST and comparisons with other constants make a
2593 // natural memory operand.
2594 if (isa<ConstantFPSDNode>(C.Op1))
2595 return false;
2596
2597 // Never swap comparisons with zero since there are many ways to optimize
2598 // those later.
2599 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2600 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2601 return false;
2602
2603 // Also keep natural memory operands second if the loaded value is
2604 // only used here. Several comparisons have memory forms.
2605 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2606 return false;
2607
2608 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2609 // In that case we generally prefer the memory to be second.
2610 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2611 // The only exceptions are when the second operand is a constant and
2612 // we can use things like CHHSI.
2613 if (!ConstOp1)
2614 return true;
2615 // The unsigned memory-immediate instructions can handle 16-bit
2616 // unsigned integers.
2617 if (C.ICmpType != SystemZICMP::SignedOnly &&
2618 isUInt<16>(ConstOp1->getZExtValue()))
2619 return false;
2620 // The signed memory-immediate instructions can handle 16-bit
2621 // signed integers.
2622 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2623 isInt<16>(ConstOp1->getSExtValue()))
2624 return false;
2625 return true;
2626 }
2627
2628 // Try to promote the use of CGFR and CLGFR.
2629 unsigned Opcode0 = C.Op0.getOpcode();
2630 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2631 return true;
2632 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2633 return true;
2634 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
2635 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2636 C.Op0.getConstantOperandVal(1) == 0xffffffff)
2637 return true;
2638
2639 return false;
2640}
2641
2642// Check whether C tests for equality between X and Y and whether X - Y
2643// or Y - X is also computed. In that case it's better to compare the
2644// result of the subtraction against zero.
2646 Comparison &C) {
2647 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2648 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2649 for (SDNode *N : C.Op0->uses()) {
2650 if (N->getOpcode() == ISD::SUB &&
2651 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2652 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2653 // Disable the nsw and nuw flags: the backend needs to handle
2654 // overflow as well during comparison elimination.
2655 SDNodeFlags Flags = N->getFlags();
2656 Flags.setNoSignedWrap(false);
2657 Flags.setNoUnsignedWrap(false);
2658 N->setFlags(Flags);
2659 C.Op0 = SDValue(N, 0);
2660 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2661 return;
2662 }
2663 }
2664 }
2665}
2666
2667// Check whether C compares a floating-point value with zero and if that
2668// floating-point value is also negated. In this case we can use the
2669// negation to set CC, so avoiding separate LOAD AND TEST and
2670// LOAD (NEGATIVE/COMPLEMENT) instructions.
2671static void adjustForFNeg(Comparison &C) {
2672 // This optimization is invalid for strict comparisons, since FNEG
2673 // does not raise any exceptions.
2674 if (C.Chain)
2675 return;
2676 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2677 if (C1 && C1->isZero()) {
2678 for (SDNode *N : C.Op0->uses()) {
2679 if (N->getOpcode() == ISD::FNEG) {
2680 C.Op0 = SDValue(N, 0);
2681 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2682 return;
2683 }
2684 }
2685 }
2686}
2687
2688// Check whether C compares (shl X, 32) with 0 and whether X is
2689// also sign-extended. In that case it is better to test the result
2690// of the sign extension using LTGFR.
2691//
2692// This case is important because InstCombine transforms a comparison
2693// with (sext (trunc X)) into a comparison with (shl X, 32).
2694static void adjustForLTGFR(Comparison &C) {
2695 // Check for a comparison between (shl X, 32) and 0.
2696 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
2697 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
2698 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2699 if (C1 && C1->getZExtValue() == 32) {
2700 SDValue ShlOp0 = C.Op0.getOperand(0);
2701 // See whether X has any SIGN_EXTEND_INREG uses.
2702 for (SDNode *N : ShlOp0->uses()) {
2703 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2704 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2705 C.Op0 = SDValue(N, 0);
2706 return;
2707 }
2708 }
2709 }
2710 }
2711}
2712
2713// If C compares the truncation of an extending load, try to compare
2714// the untruncated value instead. This exposes more opportunities to
2715// reuse CC.
2716static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2717 Comparison &C) {
2718 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2719 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2720 C.Op1.getOpcode() == ISD::Constant &&
2721 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
2722 C.Op1->getAsZExtVal() == 0) {
2723 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2724 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
2725 C.Op0.getValueSizeInBits().getFixedValue()) {
2726 unsigned Type = L->getExtensionType();
2727 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2728 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2729 C.Op0 = C.Op0.getOperand(0);
2730 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2731 }
2732 }
2733 }
2734}
2735
2736// Return true if shift operation N has an in-range constant shift value.
2737// Store it in ShiftVal if so.
2738static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2739 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2740 if (!Shift)
2741 return false;
2742
2743 uint64_t Amount = Shift->getZExtValue();
2744 if (Amount >= N.getValueSizeInBits())
2745 return false;
2746
2747 ShiftVal = Amount;
2748 return true;
2749}
2750
2751// Check whether an AND with Mask is suitable for a TEST UNDER MASK
2752// instruction and whether the CC value is descriptive enough to handle
2753// a comparison of type Opcode between the AND result and CmpVal.
2754// CCMask says which comparison result is being tested and BitSize is
2755// the number of bits in the operands. If TEST UNDER MASK can be used,
2756// return the corresponding CC mask, otherwise return 0.
2757static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2758 uint64_t Mask, uint64_t CmpVal,
2759 unsigned ICmpType) {
2760 assert(Mask != 0 && "ANDs with zero should have been removed by now");
2761
2762 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2763 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2764 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2765 return 0;
2766
2767 // Work out the masks for the lowest and highest bits.
2769 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
2770
2771 // Signed ordered comparisons are effectively unsigned if the sign
2772 // bit is dropped.
2773 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2774
2775 // Check for equality comparisons with 0, or the equivalent.
2776 if (CmpVal == 0) {
2777 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2779 if (CCMask == SystemZ::CCMASK_CMP_NE)
2781 }
2782 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2783 if (CCMask == SystemZ::CCMASK_CMP_LT)
2785 if (CCMask == SystemZ::CCMASK_CMP_GE)
2787 }
2788 if (EffectivelyUnsigned && CmpVal < Low) {
2789 if (CCMask == SystemZ::CCMASK_CMP_LE)
2791 if (CCMask == SystemZ::CCMASK_CMP_GT)
2793 }
2794
2795 // Check for equality comparisons with the mask, or the equivalent.
2796 if (CmpVal == Mask) {
2797 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2799 if (CCMask == SystemZ::CCMASK_CMP_NE)
2801 }
2802 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2803 if (CCMask == SystemZ::CCMASK_CMP_GT)
2805 if (CCMask == SystemZ::CCMASK_CMP_LE)
2807 }
2808 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2809 if (CCMask == SystemZ::CCMASK_CMP_GE)
2811 if (CCMask == SystemZ::CCMASK_CMP_LT)
2813 }
2814
2815 // Check for ordered comparisons with the top bit.
2816 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2817 if (CCMask == SystemZ::CCMASK_CMP_LE)
2819 if (CCMask == SystemZ::CCMASK_CMP_GT)
2821 }
2822 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2823 if (CCMask == SystemZ::CCMASK_CMP_LT)
2825 if (CCMask == SystemZ::CCMASK_CMP_GE)
2827 }
2828
2829 // If there are just two bits, we can do equality checks for Low and High
2830 // as well.
2831 if (Mask == Low + High) {
2832 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2834 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2836 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2838 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2840 }
2841
2842 // Looks like we've exhausted our options.
2843 return 0;
2844}
2845
2846// See whether C can be implemented as a TEST UNDER MASK instruction.
2847// Update the arguments with the TM version if so.
2849 Comparison &C) {
2850 // Use VECTOR TEST UNDER MASK for i128 operations.
2851 if (C.Op0.getValueType() == MVT::i128) {
2852 // We can use VTM for EQ/NE comparisons of x & y against 0.
2853 if (C.Op0.getOpcode() == ISD::AND &&
2854 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2855 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
2856 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
2857 if (Mask && Mask->getAPIntValue() == 0) {
2858 C.Opcode = SystemZISD::VTM;
2859 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
2860 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
2861 C.CCValid = SystemZ::CCMASK_VCMP;
2862 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
2863 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
2864 else
2865 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
2866 }
2867 }
2868 return;
2869 }
2870
2871 // Check that we have a comparison with a constant.
2872 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2873 if (!ConstOp1)
2874 return;
2875 uint64_t CmpVal = ConstOp1->getZExtValue();
2876
2877 // Check whether the nonconstant input is an AND with a constant mask.
2878 Comparison NewC(C);
2879 uint64_t MaskVal;
2880 ConstantSDNode *Mask = nullptr;
2881 if (C.Op0.getOpcode() == ISD::AND) {
2882 NewC.Op0 = C.Op0.getOperand(0);
2883 NewC.Op1 = C.Op0.getOperand(1);
2884 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2885 if (!Mask)
2886 return;
2887 MaskVal = Mask->getZExtValue();
2888 } else {
2889 // There is no instruction to compare with a 64-bit immediate
2890 // so use TMHH instead if possible. We need an unsigned ordered
2891 // comparison with an i64 immediate.
2892 if (NewC.Op0.getValueType() != MVT::i64 ||
2893 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2894 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2895 NewC.ICmpType == SystemZICMP::SignedOnly)
2896 return;
2897 // Convert LE and GT comparisons into LT and GE.
2898 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2899 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2900 if (CmpVal == uint64_t(-1))
2901 return;
2902 CmpVal += 1;
2903 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2904 }
2905 // If the low N bits of Op1 are zero than the low N bits of Op0 can
2906 // be masked off without changing the result.
2907 MaskVal = -(CmpVal & -CmpVal);
2908 NewC.ICmpType = SystemZICMP::UnsignedOnly;
2909 }
2910 if (!MaskVal)
2911 return;
2912
2913 // Check whether the combination of mask, comparison value and comparison
2914 // type are suitable.
2915 unsigned BitSize = NewC.Op0.getValueSizeInBits();
2916 unsigned NewCCMask, ShiftVal;
2917 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2918 NewC.Op0.getOpcode() == ISD::SHL &&
2919 isSimpleShift(NewC.Op0, ShiftVal) &&
2920 (MaskVal >> ShiftVal != 0) &&
2921 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2922 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2923 MaskVal >> ShiftVal,
2924 CmpVal >> ShiftVal,
2925 SystemZICMP::Any))) {
2926 NewC.Op0 = NewC.Op0.getOperand(0);
2927 MaskVal >>= ShiftVal;
2928 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2929 NewC.Op0.getOpcode() == ISD::SRL &&
2930 isSimpleShift(NewC.Op0, ShiftVal) &&
2931 (MaskVal << ShiftVal != 0) &&
2932 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2933 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2934 MaskVal << ShiftVal,
2935 CmpVal << ShiftVal,
2937 NewC.Op0 = NewC.Op0.getOperand(0);
2938 MaskVal <<= ShiftVal;
2939 } else {
2940 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2941 NewC.ICmpType);
2942 if (!NewCCMask)
2943 return;
2944 }
2945
2946 // Go ahead and make the change.
2947 C.Opcode = SystemZISD::TM;
2948 C.Op0 = NewC.Op0;
2949 if (Mask && Mask->getZExtValue() == MaskVal)
2950 C.Op1 = SDValue(Mask, 0);
2951 else
2952 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2953 C.CCValid = SystemZ::CCMASK_TM;
2954 C.CCMask = NewCCMask;
2955}
2956
2957// Implement i128 comparison in vector registers.
2958static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
2959 Comparison &C) {
2960 if (C.Opcode != SystemZISD::ICMP)
2961 return;
2962 if (C.Op0.getValueType() != MVT::i128)
2963 return;
2964
2965 // (In-)Equality comparisons can be implemented via VCEQGS.
2966 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2967 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2968 C.Opcode = SystemZISD::VICMPES;
2969 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
2970 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
2971 C.CCValid = SystemZ::CCMASK_VCMP;
2972 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
2973 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
2974 else
2975 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
2976 return;
2977 }
2978
2979 // Normalize other comparisons to GT.
2980 bool Swap = false, Invert = false;
2981 switch (C.CCMask) {
2982 case SystemZ::CCMASK_CMP_GT: break;
2983 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
2984 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
2985 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
2986 default: llvm_unreachable("Invalid integer condition!");
2987 }
2988 if (Swap)
2989 std::swap(C.Op0, C.Op1);
2990
2991 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2992 C.Opcode = SystemZISD::UCMP128HI;
2993 else
2994 C.Opcode = SystemZISD::SCMP128HI;
2995 C.CCValid = SystemZ::CCMASK_ANY;
2996 C.CCMask = SystemZ::CCMASK_1;
2997
2998 if (Invert)
2999 C.CCMask ^= C.CCValid;
3000}
3001
3002// See whether the comparison argument contains a redundant AND
3003// and remove it if so. This sometimes happens due to the generic
3004// BRCOND expansion.
3006 Comparison &C) {
3007 if (C.Op0.getOpcode() != ISD::AND)
3008 return;
3009 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3010 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3011 return;
3012 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3013 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3014 return;
3015
3016 C.Op0 = C.Op0.getOperand(0);
3017}
3018
3019// Return a Comparison that tests the condition-code result of intrinsic
3020// node Call against constant integer CC using comparison code Cond.
3021// Opcode is the opcode of the SystemZISD operation for the intrinsic
3022// and CCValid is the set of possible condition-code results.
3023static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3024 SDValue Call, unsigned CCValid, uint64_t CC,
3026 Comparison C(Call, SDValue(), SDValue());
3027 C.Opcode = Opcode;
3028 C.CCValid = CCValid;
3029 if (Cond == ISD::SETEQ)
3030 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3031 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3032 else if (Cond == ISD::SETNE)
3033 // ...and the inverse of that.
3034 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3035 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3036 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3037 // always true for CC>3.
3038 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3039 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3040 // ...and the inverse of that.
3041 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3042 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3043 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3044 // always true for CC>3.
3045 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3046 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3047 // ...and the inverse of that.
3048 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3049 else
3050 llvm_unreachable("Unexpected integer comparison type");
3051 C.CCMask &= CCValid;
3052 return C;
3053}
3054
3055// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3056static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3057 ISD::CondCode Cond, const SDLoc &DL,
3058 SDValue Chain = SDValue(),
3059 bool IsSignaling = false) {
3060 if (CmpOp1.getOpcode() == ISD::Constant) {
3061 assert(!Chain);
3062 unsigned Opcode, CCValid;
3063 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3064 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3065 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3066 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3067 CmpOp1->getAsZExtVal(), Cond);
3068 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3069 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3070 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3071 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3072 CmpOp1->getAsZExtVal(), Cond);
3073 }
3074 Comparison C(CmpOp0, CmpOp1, Chain);
3075 C.CCMask = CCMaskForCondCode(Cond);
3076 if (C.Op0.getValueType().isFloatingPoint()) {
3077 C.CCValid = SystemZ::CCMASK_FCMP;
3078 if (!C.Chain)
3079 C.Opcode = SystemZISD::FCMP;
3080 else if (!IsSignaling)
3081 C.Opcode = SystemZISD::STRICT_FCMP;
3082 else
3083 C.Opcode = SystemZISD::STRICT_FCMPS;
3085 } else {
3086 assert(!C.Chain);
3087 C.CCValid = SystemZ::CCMASK_ICMP;
3088 C.Opcode = SystemZISD::ICMP;
3089 // Choose the type of comparison. Equality and inequality tests can
3090 // use either signed or unsigned comparisons. The choice also doesn't
3091 // matter if both sign bits are known to be clear. In those cases we
3092 // want to give the main isel code the freedom to choose whichever
3093 // form fits best.
3094 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3095 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3096 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3097 C.ICmpType = SystemZICMP::Any;
3098 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3099 C.ICmpType = SystemZICMP::UnsignedOnly;
3100 else
3101 C.ICmpType = SystemZICMP::SignedOnly;
3102 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3103 adjustForRedundantAnd(DAG, DL, C);
3104 adjustZeroCmp(DAG, DL, C);
3105 adjustSubwordCmp(DAG, DL, C);
3106 adjustForSubtraction(DAG, DL, C);
3108 adjustICmpTruncate(DAG, DL, C);
3109 }
3110
3111 if (shouldSwapCmpOperands(C)) {
3112 std::swap(C.Op0, C.Op1);
3113 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3114 }
3115
3117 adjustICmp128(DAG, DL, C);
3118 return C;
3119}
3120
3121// Emit the comparison instruction described by C.
3122static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3123 if (!C.Op1.getNode()) {
3124 SDNode *Node;
3125 switch (C.Op0.getOpcode()) {
3127 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3128 return SDValue(Node, 0);
3130 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3131 return SDValue(Node, Node->getNumValues() - 1);
3132 default:
3133 llvm_unreachable("Invalid comparison operands");
3134 }
3135 }
3136 if (C.Opcode == SystemZISD::ICMP)
3137 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3138 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3139 if (C.Opcode == SystemZISD::TM) {
3140 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3142 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3143 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3144 }
3145 if (C.Opcode == SystemZISD::VICMPES) {
3146 SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32);
3147 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3148 return SDValue(Val.getNode(), 1);
3149 }
3150 if (C.Chain) {
3151 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3152 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3153 }
3154 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3155}
3156
3157// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3158// 64 bits. Extend is the extension type to use. Store the high part
3159// in Hi and the low part in Lo.
3160static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3161 SDValue Op0, SDValue Op1, SDValue &Hi,
3162 SDValue &Lo) {
3163 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3164 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3165 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3166 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3167 DAG.getConstant(32, DL, MVT::i64));
3168 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3169 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3170}
3171
3172// Lower a binary operation that produces two VT results, one in each
3173// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3174// and Opcode performs the GR128 operation. Store the even register result
3175// in Even and the odd register result in Odd.
3176static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3177 unsigned Opcode, SDValue Op0, SDValue Op1,
3178 SDValue &Even, SDValue &Odd) {
3179 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3180 bool Is32Bit = is32Bit(VT);
3181 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3182 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3183}
3184
3185// Return an i32 value that is 1 if the CC value produced by CCReg is
3186// in the mask CCMask and 0 otherwise. CC is known to have a value
3187// in CCValid, so other values can be ignored.
3188static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3189 unsigned CCValid, unsigned CCMask) {
3190 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3191 DAG.getConstant(0, DL, MVT::i32),
3192 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3193 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3194 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3195}
3196
3197// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3198// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3199// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3200// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3201// floating-point comparisons.
3204 switch (CC) {
3205 case ISD::SETOEQ:
3206 case ISD::SETEQ:
3207 switch (Mode) {
3208 case CmpMode::Int: return SystemZISD::VICMPE;
3209 case CmpMode::FP: return SystemZISD::VFCMPE;
3210 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3211 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3212 }
3213 llvm_unreachable("Bad mode");
3214
3215 case ISD::SETOGE:
3216 case ISD::SETGE:
3217 switch (Mode) {
3218 case CmpMode::Int: return 0;
3219 case CmpMode::FP: return SystemZISD::VFCMPHE;
3220 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3221 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3222 }
3223 llvm_unreachable("Bad mode");
3224
3225 case ISD::SETOGT:
3226 case ISD::SETGT:
3227 switch (Mode) {
3228 case CmpMode::Int: return SystemZISD::VICMPH;
3229 case CmpMode::FP: return SystemZISD::VFCMPH;
3230 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3231 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3232 }
3233 llvm_unreachable("Bad mode");
3234
3235 case ISD::SETUGT:
3236 switch (Mode) {
3237 case CmpMode::Int: return SystemZISD::VICMPHL;
3238 case CmpMode::FP: return 0;
3239 case CmpMode::StrictFP: return 0;
3240 case CmpMode::SignalingFP: return 0;
3241 }
3242 llvm_unreachable("Bad mode");
3243
3244 default:
3245 return 0;
3246 }
3247}
3248
3249// Return the SystemZISD vector comparison operation for CC or its inverse,
3250// or 0 if neither can be done directly. Indicate in Invert whether the
3251// result is for the inverse of CC. Mode is as above.
3253 bool &Invert) {
3254 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3255 Invert = false;
3256 return Opcode;
3257 }
3258
3259 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3260 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3261 Invert = true;
3262 return Opcode;
3263 }
3264
3265 return 0;
3266}
3267
3268// Return a v2f64 that contains the extended form of elements Start and Start+1
3269// of v4f32 value Op. If Chain is nonnull, return the strict form.
3270static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3271 SDValue Op, SDValue Chain) {
3272 int Mask[] = { Start, -1, Start + 1, -1 };
3273 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3274 if (Chain) {
3275 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3276 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3277 }
3278 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3279}
3280
3281// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3282// producing a result of type VT. If Chain is nonnull, return the strict form.
3283SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3284 const SDLoc &DL, EVT VT,
3285 SDValue CmpOp0,
3286 SDValue CmpOp1,
3287 SDValue Chain) const {
3288 // There is no hardware support for v4f32 (unless we have the vector
3289 // enhancements facility 1), so extend the vector into two v2f64s
3290 // and compare those.
3291 if (CmpOp0.getValueType() == MVT::v4f32 &&
3292 !Subtarget.hasVectorEnhancements1()) {
3293 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3294 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3295 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3296 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3297 if (Chain) {
3298 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3299 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3300 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3301 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3302 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3303 H1.getValue(1), L1.getValue(1),
3304 HRes.getValue(1), LRes.getValue(1) };
3305 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3306 SDValue Ops[2] = { Res, NewChain };
3307 return DAG.getMergeValues(Ops, DL);
3308 }
3309 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3310 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3311 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3312 }
3313 if (Chain) {
3314 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3315 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3316 }
3317 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3318}
3319
3320// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3321// an integer mask of type VT. If Chain is nonnull, we have a strict
3322// floating-point comparison. If in addition IsSignaling is true, we have
3323// a strict signaling floating-point comparison.
3324SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3325 const SDLoc &DL, EVT VT,
3327 SDValue CmpOp0,
3328 SDValue CmpOp1,
3329 SDValue Chain,
3330 bool IsSignaling) const {
3331 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3332 assert (!Chain || IsFP);
3333 assert (!IsSignaling || Chain);
3334 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3335 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3336 bool Invert = false;
3337 SDValue Cmp;
3338 switch (CC) {
3339 // Handle tests for order using (or (ogt y x) (oge x y)).
3340 case ISD::SETUO:
3341 Invert = true;
3342 [[fallthrough]];
3343 case ISD::SETO: {
3344 assert(IsFP && "Unexpected integer comparison");
3345 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3346 DL, VT, CmpOp1, CmpOp0, Chain);
3347 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3348 DL, VT, CmpOp0, CmpOp1, Chain);
3349 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3350 if (Chain)
3351 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3352 LT.getValue(1), GE.getValue(1));
3353 break;
3354 }
3355
3356 // Handle <> tests using (or (ogt y x) (ogt x y)).
3357 case ISD::SETUEQ:
3358 Invert = true;
3359 [[fallthrough]];
3360 case ISD::SETONE: {
3361 assert(IsFP && "Unexpected integer comparison");
3362 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3363 DL, VT, CmpOp1, CmpOp0, Chain);
3364 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3365 DL, VT, CmpOp0, CmpOp1, Chain);
3366 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3367 if (Chain)
3368 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3369 LT.getValue(1), GT.getValue(1));
3370 break;
3371 }
3372
3373 // Otherwise a single comparison is enough. It doesn't really
3374 // matter whether we try the inversion or the swap first, since
3375 // there are no cases where both work.
3376 default:
3377 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3378 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3379 else {
3381 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3382 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3383 else
3384 llvm_unreachable("Unhandled comparison");
3385 }
3386 if (Chain)
3387 Chain = Cmp.getValue(1);
3388 break;
3389 }
3390 if (Invert) {
3391 SDValue Mask =
3392 DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
3393 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3394 }
3395 if (Chain && Chain.getNode() != Cmp.getNode()) {
3396 SDValue Ops[2] = { Cmp, Chain };
3397 Cmp = DAG.getMergeValues(Ops, DL);
3398 }
3399 return Cmp;
3400}
3401
3402SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3403 SelectionDAG &DAG) const {
3404 SDValue CmpOp0 = Op.getOperand(0);
3405 SDValue CmpOp1 = Op.getOperand(1);
3406 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3407 SDLoc DL(Op);
3408 EVT VT = Op.getValueType();
3409 if (VT.isVector())
3410 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3411
3412 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3413 SDValue CCReg = emitCmp(DAG, DL, C);
3414 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3415}
3416
3417SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3418 SelectionDAG &DAG,
3419 bool IsSignaling) const {
3420 SDValue Chain = Op.getOperand(0);
3421 SDValue CmpOp0 = Op.getOperand(1);
3422 SDValue CmpOp1 = Op.getOperand(2);
3423 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3424 SDLoc DL(Op);
3425 EVT VT = Op.getNode()->getValueType(0);
3426 if (VT.isVector()) {
3427 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3428 Chain, IsSignaling);
3429 return Res.getValue(Op.getResNo());
3430 }
3431
3432 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3433 SDValue CCReg = emitCmp(DAG, DL, C);
3434 CCReg->setFlags(Op->getFlags());
3435 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3436 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3437 return DAG.getMergeValues(Ops, DL);
3438}
3439
3440SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3441 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3442 SDValue CmpOp0 = Op.getOperand(2);
3443 SDValue CmpOp1 = Op.getOperand(3);
3444 SDValue Dest = Op.getOperand(4);
3445 SDLoc DL(Op);
3446
3447 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3448 SDValue CCReg = emitCmp(DAG, DL, C);
3449 return DAG.getNode(
3450 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3451 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3452 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3453}
3454
3455// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3456// allowing Pos and Neg to be wider than CmpOp.
3457static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3458 return (Neg.getOpcode() == ISD::SUB &&
3459 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3460 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
3461 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3462 Pos.getOperand(0) == CmpOp)));
3463}
3464
3465// Return the absolute or negative absolute of Op; IsNegative decides which.
3467 bool IsNegative) {
3468 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3469 if (IsNegative)
3470 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3471 DAG.getConstant(0, DL, Op.getValueType()), Op);
3472 return Op;
3473}
3474
3475SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3476 SelectionDAG &DAG) const {
3477 SDValue CmpOp0 = Op.getOperand(0);
3478 SDValue CmpOp1 = Op.getOperand(1);
3479 SDValue TrueOp = Op.getOperand(2);
3480 SDValue FalseOp = Op.getOperand(3);
3481 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3482 SDLoc DL(Op);
3483
3484 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3485
3486 // Check for absolute and negative-absolute selections, including those
3487 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3488 // This check supplements the one in DAGCombiner.
3489 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3490 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3491 C.Op1.getOpcode() == ISD::Constant &&
3492 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3493 C.Op1->getAsZExtVal() == 0) {
3494 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3495 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3496 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3497 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3498 }
3499
3500 SDValue CCReg = emitCmp(DAG, DL, C);
3501 SDValue Ops[] = {TrueOp, FalseOp,
3502 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3503 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3504
3505 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3506}
3507
3508SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3509 SelectionDAG &DAG) const {
3510 SDLoc DL(Node);
3511 const GlobalValue *GV = Node->getGlobal();
3512 int64_t Offset = Node->getOffset();
3513 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3515
3517 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3518 if (isInt<32>(Offset)) {
3519 // Assign anchors at 1<<12 byte boundaries.
3520 uint64_t Anchor = Offset & ~uint64_t(0xfff);
3521 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3522 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3523
3524 // The offset can be folded into the address if it is aligned to a
3525 // halfword.
3526 Offset -= Anchor;
3527 if (Offset != 0 && (Offset & 1) == 0) {
3528 SDValue Full =
3529 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3530 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3531 Offset = 0;
3532 }
3533 } else {
3534 // Conservatively load a constant offset greater than 32 bits into a
3535 // register below.
3536 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3537 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3538 }
3539 } else if (Subtarget.isTargetELF()) {
3540 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3541 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3542 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3544 } else if (Subtarget.isTargetzOS()) {
3545 Result = getADAEntry(DAG, GV, DL, PtrVT);
3546 } else
3547 llvm_unreachable("Unexpected Subtarget");
3548
3549 // If there was a non-zero offset that we didn't fold, create an explicit
3550 // addition for it.
3551 if (Offset != 0)
3552 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3553 DAG.getConstant(Offset, DL, PtrVT));
3554
3555 return Result;
3556}
3557
3558SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3559 SelectionDAG &DAG,
3560 unsigned Opcode,
3561 SDValue GOTOffset) const {
3562 SDLoc DL(Node);
3563 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3564 SDValue Chain = DAG.getEntryNode();
3565 SDValue Glue;
3566
3569 report_fatal_error("In GHC calling convention TLS is not supported");
3570
3571 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3572 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3573 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3574 Glue = Chain.getValue(1);
3575 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3576 Glue = Chain.getValue(1);
3577
3578 // The first call operand is the chain and the second is the TLS symbol.
3580 Ops.push_back(Chain);
3581 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3582 Node->getValueType(0),
3583 0, 0));
3584
3585 // Add argument registers to the end of the list so that they are
3586 // known live into the call.
3587 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3588 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3589
3590 // Add a register mask operand representing the call-preserved registers.
3591 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3592 const uint32_t *Mask =
3593 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3594 assert(Mask && "Missing call preserved mask for calling convention");
3595 Ops.push_back(DAG.getRegisterMask(Mask));
3596
3597 // Glue the call to the argument copies.
3598 Ops.push_back(Glue);
3599
3600 // Emit the call.
3601 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3602 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3603 Glue = Chain.getValue(1);
3604
3605 // Copy the return value from %r2.
3606 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3607}
3608
3609SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3610 SelectionDAG &DAG) const {
3611 SDValue Chain = DAG.getEntryNode();
3612 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3613
3614 // The high part of the thread pointer is in access register 0.
3615 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3616 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3617
3618 // The low part of the thread pointer is in access register 1.
3619 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3620 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3621
3622 // Merge them into a single 64-bit address.
3623 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3624 DAG.getConstant(32, DL, PtrVT));
3625 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3626}
3627
3628SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3629 SelectionDAG &DAG) const {
3630 if (DAG.getTarget().useEmulatedTLS())
3631 return LowerToTLSEmulatedModel(Node, DAG);
3632 SDLoc DL(Node);
3633 const GlobalValue *GV = Node->getGlobal();
3634 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3635 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
3636
3639 report_fatal_error("In GHC calling convention TLS is not supported");
3640
3641 SDValue TP = lowerThreadPointer(DL, DAG);
3642
3643 // Get the offset of GA from the thread pointer, based on the TLS model.
3645 switch (model) {
3647 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3650
3651 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3652 Offset = DAG.getLoad(
3653 PtrVT, DL, DAG.getEntryNode(), Offset,
3655
3656 // Call __tls_get_offset to retrieve the offset.
3657 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3658 break;
3659 }
3660
3662 // Load the GOT offset of the module ID.
3665
3666 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3667 Offset = DAG.getLoad(
3668 PtrVT, DL, DAG.getEntryNode(), Offset,
3670
3671 // Call __tls_get_offset to retrieve the module base offset.
3672 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3673
3674 // Note: The SystemZLDCleanupPass will remove redundant computations
3675 // of the module base offset. Count total number of local-dynamic
3676 // accesses to trigger execution of that pass.
3680
3681 // Add the per-symbol offset.
3683
3684 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3685 DTPOffset = DAG.getLoad(
3686 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3688
3689 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3690 break;
3691 }
3692
3693 case TLSModel::InitialExec: {
3694 // Load the offset from the GOT.
3695 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3698 Offset =
3699 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3701 break;
3702 }
3703
3704 case TLSModel::LocalExec: {
3705 // Force the offset into the constant pool and load it from there.
3708
3709 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3710 Offset = DAG.getLoad(
3711 PtrVT, DL, DAG.getEntryNode(), Offset,
3713 break;
3714 }
3715 }
3716
3717 // Add the base and offset together.
3718 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3719}
3720
3721SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3722 SelectionDAG &DAG) const {
3723 SDLoc DL(Node);
3724 const BlockAddress *BA = Node->getBlockAddress();
3725 int64_t Offset = Node->getOffset();
3726 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3727
3728 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3729 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3730 return Result;
3731}
3732
3733SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3734 SelectionDAG &DAG) const {
3735 SDLoc DL(JT);
3736 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3737 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3738
3739 // Use LARL to load the address of the table.
3740 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3741}
3742
3743SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3744 SelectionDAG &DAG) const {
3745 SDLoc DL(CP);
3746 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3747
3749 if (CP->isMachineConstantPoolEntry())
3750 Result =
3751 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3752 else
3753 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3754 CP->getOffset());
3755
3756 // Use LARL to load the address of the constant pool entry.
3757 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3758}
3759
3760SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3761 SelectionDAG &DAG) const {
3762 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
3764 MachineFrameInfo &MFI = MF.getFrameInfo();
3765 MFI.setFrameAddressIsTaken(true);
3766
3767 SDLoc DL(Op);
3768 unsigned Depth = Op.getConstantOperandVal(0);
3769 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3770
3771 // By definition, the frame address is the address of the back chain. (In
3772 // the case of packed stack without backchain, return the address where the
3773 // backchain would have been stored. This will either be an unused space or
3774 // contain a saved register).
3775 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3776 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3777
3778 if (Depth > 0) {
3779 // FIXME The frontend should detect this case.
3780 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
3781 report_fatal_error("Unsupported stack frame traversal count");
3782
3783 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
3784 while (Depth--) {
3785 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
3787 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
3788 }
3789 }
3790
3791 return BackChain;
3792}
3793
3794SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3795 SelectionDAG &DAG) const {
3797 MachineFrameInfo &MFI = MF.getFrameInfo();
3798 MFI.setReturnAddressIsTaken(true);
3799
3801 return SDValue();
3802
3803 SDLoc DL(Op);
3804 unsigned Depth = Op.getConstantOperandVal(0);
3805 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3806
3807 if (Depth > 0) {
3808 // FIXME The frontend should detect this case.
3809 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
3810 report_fatal_error("Unsupported stack frame traversal count");
3811
3812 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
3813 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
3814 int Offset = TFL->getReturnAddressOffset(MF);
3815 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
3816 DAG.getConstant(Offset, DL, PtrVT));
3817 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
3819 }
3820
3821 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an
3822 // implicit live-in.
3825 &SystemZ::GR64BitRegClass);
3826 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3827}
3828
3829SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3830 SelectionDAG &DAG) const {
3831 SDLoc DL(Op);
3832 SDValue In = Op.getOperand(0);
3833 EVT InVT = In.getValueType();
3834 EVT ResVT = Op.getValueType();
3835
3836 // Convert loads directly. This is normally done by DAGCombiner,
3837 // but we need this case for bitcasts that are created during lowering
3838 // and which are then lowered themselves.
3839 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3840 if (ISD::isNormalLoad(LoadN)) {
3841 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3842 LoadN->getBasePtr(), LoadN->getMemOperand());
3843 // Update the chain uses.
3844 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3845 return NewLoad;
3846 }
3847
3848 if (InVT == MVT::i32 && ResVT == MVT::f32) {
3849 SDValue In64;
3850 if (Subtarget.hasHighWord()) {
3851 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3852 MVT::i64);
3853 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3854 MVT::i64, SDValue(U64, 0), In);
3855 } else {
3856 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3857 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3858 DAG.getConstant(32, DL, MVT::i64));
3859 }
3860 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3861 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3862 DL, MVT::f32, Out64);
3863 }
3864 if (InVT == MVT::f32 && ResVT == MVT::i32) {
3865 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3866 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3867 MVT::f64, SDValue(U64, 0), In);
3868 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3869 if (Subtarget.hasHighWord())
3870 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3871 MVT::i32, Out64);
3872 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3873 DAG.getConstant(32, DL, MVT::i64));
3874 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3875 }
3876 llvm_unreachable("Unexpected bitcast combination");
3877}
3878
3879SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3880 SelectionDAG &DAG) const {
3881
3882 if (Subtarget.isTargetXPLINK64())
3883 return lowerVASTART_XPLINK(Op, DAG);
3884 else
3885 return lowerVASTART_ELF(Op, DAG);
3886}
3887
3888SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
3889 SelectionDAG &DAG) const {
3891 SystemZMachineFunctionInfo *FuncInfo =
3893
3894 SDLoc DL(Op);
3895
3896 // vastart just stores the address of the VarArgsFrameIndex slot into the
3897 // memory location argument.
3898 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3899 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3900 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3901 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
3902 MachinePointerInfo(SV));
3903}
3904
3905SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
3906 SelectionDAG &DAG) const {
3908 SystemZMachineFunctionInfo *FuncInfo =
3910 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3911
3912 SDValue Chain = Op.getOperand(0);
3913 SDValue Addr = Op.getOperand(1);
3914 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3915 SDLoc DL(Op);
3916
3917 // The initial values of each field.
3918 const unsigned NumFields = 4;
3919 SDValue Fields[NumFields] = {
3920 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3921 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3922 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3923 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3924 };
3925
3926 // Store each field into its respective slot.
3927 SDValue MemOps[NumFields];
3928 unsigned Offset = 0;
3929 for (unsigned I = 0; I < NumFields; ++I) {
3930 SDValue FieldAddr = Addr;
3931 if (Offset != 0)
3932 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3934 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3936 Offset += 8;
3937 }
3938 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3939}
3940
3941SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3942 SelectionDAG &DAG) const {
3943 SDValue Chain = Op.getOperand(0);
3944 SDValue DstPtr = Op.getOperand(1);
3945 SDValue SrcPtr = Op.getOperand(2);
3946 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3947 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3948 SDLoc DL(Op);
3949
3950 uint32_t Sz =
3951 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
3952 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
3953 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3954 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV),
3955 MachinePointerInfo(SrcSV));
3956}
3957
3958SDValue
3959SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
3960 SelectionDAG &DAG) const {
3961 if (Subtarget.isTargetXPLINK64())
3962 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
3963 else
3964 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
3965}
3966
3967SDValue
3968SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
3969 SelectionDAG &DAG) const {
3970 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3972 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3973 SDValue Chain = Op.getOperand(0);
3974 SDValue Size = Op.getOperand(1);
3975 SDValue Align = Op.getOperand(2);
3976 SDLoc DL(Op);
3977
3978 // If user has set the no alignment function attribute, ignore
3979 // alloca alignments.
3980 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
3981
3982 uint64_t StackAlign = TFI->getStackAlignment();
3983 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3984 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3985
3986 SDValue NeededSpace = Size;
3987
3988 // Add extra space for alignment if needed.
3989 EVT PtrVT = getPointerTy(MF.getDataLayout());
3990 if (ExtraAlignSpace)
3991 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
3992 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3993
3994 bool IsSigned = false;
3995 bool DoesNotReturn = false;
3996 bool IsReturnValueUsed = false;
3997 EVT VT = Op.getValueType();
3998 SDValue AllocaCall =
3999 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
4000 CallingConv::C, IsSigned, DL, DoesNotReturn,
4001 IsReturnValueUsed)
4002 .first;
4003
4004 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4005 // to end of call in order to ensure it isn't broken up from the call
4006 // sequence.
4007 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4008 Register SPReg = Regs.getStackPointerRegister();
4009 Chain = AllocaCall.getValue(1);
4010 SDValue Glue = AllocaCall.getValue(2);
4011 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4012 Chain = NewSPRegNode.getValue(1);
4013
4014 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4015 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4016 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4017
4018 // Dynamically realign if needed.
4019 if (ExtraAlignSpace) {
4020 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4021 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4022 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4023 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4024 }
4025
4026 SDValue Ops[2] = {Result, Chain};
4027 return DAG.getMergeValues(Ops, DL);
4028}
4029
4030SDValue
4031SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4032 SelectionDAG &DAG) const {
4033 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4035 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4036 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4037
4038 SDValue Chain = Op.getOperand(0);
4039 SDValue Size = Op.getOperand(1);
4040 SDValue Align = Op.getOperand(2);
4041 SDLoc DL(Op);
4042
4043 // If user has set the no alignment function attribute, ignore
4044 // alloca alignments.
4045 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4046
4047 uint64_t StackAlign = TFI->getStackAlignment();
4048 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4049 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4050
4052 SDValue NeededSpace = Size;
4053
4054 // Get a reference to the stack pointer.
4055 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4056
4057 // If we need a backchain, save it now.
4058 SDValue Backchain;
4059 if (StoreBackchain)
4060 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4062
4063 // Add extra space for alignment if needed.
4064 if (ExtraAlignSpace)
4065 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4066 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4067
4068 // Get the new stack pointer value.
4069 SDValue NewSP;
4070 if (hasInlineStackProbe(MF)) {
4072 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4073 Chain = NewSP.getValue(1);
4074 }
4075 else {
4076 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4077 // Copy the new stack pointer back.
4078 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4079 }
4080
4081 // The allocated data lives above the 160 bytes allocated for the standard
4082 // frame, plus any outgoing stack arguments. We don't know how much that
4083 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4084 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4085 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4086
4087 // Dynamically realign if needed.
4088 if (RequiredAlign > StackAlign) {
4089 Result =
4090 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4091 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4092 Result =
4093 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4094 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4095 }
4096
4097 if (StoreBackchain)
4098 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4100
4101 SDValue Ops[2] = { Result, Chain };
4102 return DAG.getMergeValues(Ops, DL);
4103}
4104
4105SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4106 SDValue Op, SelectionDAG &DAG) const {
4107 SDLoc DL(Op);
4108
4109 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4110}
4111
4112SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4113 SelectionDAG &DAG) const {
4114 EVT VT = Op.getValueType();
4115 SDLoc DL(Op);
4116 SDValue Ops[2];
4117 if (is32Bit(VT))
4118 // Just do a normal 64-bit multiplication and extract the results.
4119 // We define this so that it can be used for constant division.
4120 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4121 Op.getOperand(1), Ops[1], Ops[0]);
4122 else if (Subtarget.hasMiscellaneousExtensions2())
4123 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4124 // the high result in the even register. ISD::SMUL_LOHI is defined to
4125 // return the low half first, so the results are in reverse order.
4127 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4128 else {
4129 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4130 //
4131 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4132 //
4133 // but using the fact that the upper halves are either all zeros
4134 // or all ones:
4135 //
4136 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4137 //
4138 // and grouping the right terms together since they are quicker than the
4139 // multiplication:
4140 //
4141 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4142 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4143 SDValue LL = Op.getOperand(0);
4144 SDValue RL = Op.getOperand(1);
4145 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4146 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4147 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4148 // the high result in the even register. ISD::SMUL_LOHI is defined to
4149 // return the low half first, so the results are in reverse order.
4151 LL, RL, Ops[1], Ops[0]);
4152 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4153 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4154 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4155 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4156 }
4157 return DAG.getMergeValues(Ops, DL);
4158}
4159
4160SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4161 SelectionDAG &DAG) const {
4162 EVT VT = Op.getValueType();
4163 SDLoc DL(Op);
4164 SDValue Ops[2];
4165 if (is32Bit(VT))
4166 // Just do a normal 64-bit multiplication and extract the results.
4167 // We define this so that it can be used for constant division.
4168 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4169 Op.getOperand(1), Ops[1], Ops[0]);
4170 else
4171 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4172 // the high result in the even register. ISD::UMUL_LOHI is defined to
4173 // return the low half first, so the results are in reverse order.
4175 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4176 return DAG.getMergeValues(Ops, DL);
4177}
4178
4179SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4180 SelectionDAG &DAG) const {
4181 SDValue Op0 = Op.getOperand(0);
4182 SDValue Op1 = Op.getOperand(1);
4183 EVT VT = Op.getValueType();
4184 SDLoc DL(Op);
4185
4186 // We use DSGF for 32-bit division. This means the first operand must
4187 // always be 64-bit, and the second operand should be 32-bit whenever
4188 // that is possible, to improve performance.
4189 if (is32Bit(VT))
4190 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4191 else if (DAG.ComputeNumSignBits(Op1) > 32)
4192 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4193
4194 // DSG(F) returns the remainder in the even register and the
4195 // quotient in the odd register.
4196 SDValue Ops[2];
4197 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4198 return DAG.getMergeValues(Ops, DL);
4199}
4200
4201SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4202 SelectionDAG &DAG) const {
4203 EVT VT = Op.getValueType();
4204 SDLoc DL(Op);
4205
4206 // DL(G) returns the remainder in the even register and the
4207 // quotient in the odd register.
4208 SDValue Ops[2];
4210 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4211 return DAG.getMergeValues(Ops, DL);
4212}
4213
4214SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4215 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4216
4217 // Get the known-zero masks for each operand.
4218 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4219 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4220 DAG.computeKnownBits(Ops[1])};
4221
4222 // See if the upper 32 bits of one operand and the lower 32 bits of the
4223 // other are known zero. They are the low and high operands respectively.
4224 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4225 Known[1].Zero.getZExtValue() };
4226 unsigned High, Low;
4227 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4228 High = 1, Low = 0;
4229 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4230 High = 0, Low = 1;
4231 else
4232 return Op;
4233
4234 SDValue LowOp = Ops[Low];
4235 SDValue HighOp = Ops[High];
4236
4237 // If the high part is a constant, we're better off using IILH.
4238 if (HighOp.getOpcode() == ISD::Constant)
4239 return Op;
4240
4241 // If the low part is a constant that is outside the range of LHI,
4242 // then we're better off using IILF.
4243 if (LowOp.getOpcode() == ISD::Constant) {
4244 int64_t Value = int32_t(LowOp->getAsZExtVal());
4245 if (!isInt<16>(Value))
4246 return Op;
4247 }
4248
4249 // Check whether the high part is an AND that doesn't change the
4250 // high 32 bits and just masks out low bits. We can skip it if so.
4251 if (HighOp.getOpcode() == ISD::AND &&
4252 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4253 SDValue HighOp0 = HighOp.getOperand(0);
4255 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4256 HighOp = HighOp0;
4257 }
4258
4259 // Take advantage of the fact that all GR32 operations only change the
4260 // low 32 bits by truncating Low to an i32 and inserting it directly
4261 // using a subreg. The interesting cases are those where the truncation
4262 // can be folded.
4263 SDLoc DL(Op);
4264 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4265 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4266 MVT::i64, HighOp, Low32);
4267}
4268
4269// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4270SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4271 SelectionDAG &DAG) const {
4272 SDNode *N = Op.getNode();
4273 SDValue LHS = N->getOperand(0);
4274 SDValue RHS = N->getOperand(1);
4275 SDLoc DL(N);
4276
4277 if (N->getValueType(0) == MVT::i128) {
4278 unsigned BaseOp = 0;
4279 unsigned FlagOp = 0;
4280 bool IsBorrow = false;
4281 switch (Op.getOpcode()) {
4282 default: llvm_unreachable("Unknown instruction!");
4283 case ISD::UADDO:
4284 BaseOp = ISD::ADD;
4285 FlagOp = SystemZISD::VACC;
4286 break;
4287 case ISD::USUBO:
4288 BaseOp = ISD::SUB;
4289 FlagOp = SystemZISD::VSCBI;
4290 IsBorrow = true;
4291 break;
4292 }
4293 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4294 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4295 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4296 DAG.getValueType(MVT::i1));
4297 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4298 if (IsBorrow)
4299 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4300 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4301 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4302 }
4303
4304 unsigned BaseOp = 0;
4305 unsigned CCValid = 0;
4306 unsigned CCMask = 0;
4307
4308 switch (Op.getOpcode()) {
4309 default: llvm_unreachable("Unknown instruction!");
4310 case ISD::SADDO:
4311 BaseOp = SystemZISD::SADDO;
4312 CCValid = SystemZ::CCMASK_ARITH;
4314 break;
4315 case ISD::SSUBO:
4316 BaseOp = SystemZISD::SSUBO;
4317 CCValid = SystemZ::CCMASK_ARITH;
4319 break;
4320 case ISD::UADDO:
4321 BaseOp = SystemZISD::UADDO;
4322 CCValid = SystemZ::CCMASK_LOGICAL;
4324 break;
4325 case ISD::USUBO:
4326 BaseOp = SystemZISD::USUBO;
4327 CCValid = SystemZ::CCMASK_LOGICAL;
4329 break;
4330 }
4331
4332 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4333 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4334
4335 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4336 if (N->getValueType(1) == MVT::i1)
4337 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4338
4339 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4340}
4341
4342static bool isAddCarryChain(SDValue Carry) {
4343 while (Carry.getOpcode() == ISD::UADDO_CARRY)
4344 Carry = Carry.getOperand(2);
4345 return Carry.getOpcode() == ISD::UADDO;
4346}
4347
4348static bool isSubBorrowChain(SDValue Carry) {
4349 while (Carry.getOpcode() == ISD::USUBO_CARRY)
4350 Carry = Carry.getOperand(2);
4351 return Carry.getOpcode() == ISD::USUBO;
4352}
4353
4354// Lower UADDO_CARRY/USUBO_CARRY nodes.
4355SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4356 SelectionDAG &DAG) const {
4357
4358 SDNode *N = Op.getNode();
4359 MVT VT = N->getSimpleValueType(0);
4360
4361 // Let legalize expand this if it isn't a legal type yet.
4362 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4363 return SDValue();
4364
4365 SDValue LHS = N->getOperand(0);
4366 SDValue RHS = N->getOperand(1);
4367 SDValue Carry = Op.getOperand(2);
4368 SDLoc DL(N);
4369
4370 if (VT == MVT::i128) {
4371 unsigned BaseOp = 0;
4372 unsigned FlagOp = 0;
4373 bool IsBorrow = false;
4374 switch (Op.getOpcode()) {
4375 default: llvm_unreachable("Unknown instruction!");
4376 case ISD::UADDO_CARRY:
4377 BaseOp = SystemZISD::VAC;
4378 FlagOp = SystemZISD::VACCC;
4379 break;
4380 case ISD::USUBO_CARRY:
4381 BaseOp = SystemZISD::VSBI;
4382 FlagOp = SystemZISD::VSBCBI;
4383 IsBorrow = true;
4384 break;
4385 }
4386 if (IsBorrow)
4387 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4388 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
4389 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
4390 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
4391 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
4392 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4393 DAG.getValueType(MVT::i1));
4394 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4395 if (IsBorrow)
4396 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4397 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4398 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4399 }
4400
4401 unsigned BaseOp = 0;
4402 unsigned CCValid = 0;
4403 unsigned CCMask = 0;
4404
4405 switch (Op.getOpcode()) {
4406 default: llvm_unreachable("Unknown instruction!");
4407 case ISD::UADDO_CARRY:
4408 if (!isAddCarryChain(Carry))
4409 return SDValue();
4410
4411 BaseOp = SystemZISD::ADDCARRY;
4412 CCValid = SystemZ::CCMASK_LOGICAL;
4414 break;
4415 case ISD::USUBO_CARRY:
4416 if (!isSubBorrowChain(Carry))
4417 return SDValue();
4418
4419 BaseOp = SystemZISD::SUBCARRY;
4420 CCValid = SystemZ::CCMASK_LOGICAL;
4422 break;
4423 }
4424
4425 // Set the condition code from the carry flag.
4426 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4427 DAG.getConstant(CCValid, DL, MVT::i32),
4428 DAG.getConstant(CCMask, DL, MVT::i32));
4429
4430 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4431 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4432
4433 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4434 if (N->getValueType(1) == MVT::i1)
4435 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4436
4437 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4438}
4439
4440SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4441 SelectionDAG &DAG) const {
4442 EVT VT = Op.getValueType();
4443 SDLoc DL(Op);
4444 Op = Op.getOperand(0);
4445
4446 if (VT.getScalarSizeInBits() == 128) {
4447 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
4448 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
4449 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
4450 DAG.getConstant(0, DL, MVT::i64));
4451 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4452 return Op;
4453 }
4454
4455 // Handle vector types via VPOPCT.
4456 if (VT.isVector()) {
4457 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4458 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4459 switch (VT.getScalarSizeInBits()) {
4460 case 8:
4461 break;
4462 case 16: {
4463 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4464 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
4465 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
4466 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4467 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
4468 break;
4469 }
4470 case 32: {
4471 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4472 DAG.getConstant(0, DL, MVT::i32));
4473 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4474 break;
4475 }
4476 case 64: {
4477 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4478 DAG.getConstant(0, DL, MVT::i32));
4479 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
4480 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4481 break;
4482 }
4483 default:
4484 llvm_unreachable("Unexpected type");
4485 }
4486 return Op;
4487 }
4488
4489 // Get the known-zero mask for the operand.
4490 KnownBits Known = DAG.computeKnownBits(Op);
4491 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
4492 if (NumSignificantBits == 0)
4493 return DAG.getConstant(0, DL, VT);
4494
4495 // Skip known-zero high parts of the operand.
4496 int64_t OrigBitSize = VT.getSizeInBits();
4497 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
4498 BitSize = std::min(BitSize, OrigBitSize);
4499
4500 // The POPCNT instruction counts the number of bits in each byte.
4501 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
4502 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
4503 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
4504
4505 // Add up per-byte counts in a binary tree. All bits of Op at
4506 // position larger than BitSize remain zero throughout.
4507 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
4508 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
4509 if (BitSize != OrigBitSize)
4510 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
4511 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
4512 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4513 }
4514
4515 // Extract overall result from high byte.
4516 if (BitSize > 8)
4517 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
4518 DAG.getConstant(BitSize - 8, DL, VT));
4519
4520 return Op;
4521}
4522
4523SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
4524 SelectionDAG &DAG) const {
4525 SDLoc DL(Op);
4526 AtomicOrdering FenceOrdering =
4527 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4528 SyncScope::ID FenceSSID =
4529 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4530
4531 // The only fence that needs an instruction is a sequentially-consistent
4532 // cross-thread fence.
4533 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
4534 FenceSSID == SyncScope::System) {
4535 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
4536 Op.getOperand(0)),
4537 0);
4538 }
4539
4540 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4541 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
4542}
4543
4544SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
4545 SelectionDAG &DAG) const {
4546 auto *Node = cast<AtomicSDNode>(Op.getNode());
4547 assert(
4548 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
4549 "Only custom lowering i128 or f128.");
4550 // Use same code to handle both legal and non-legal i128 types.
4553 return DAG.getMergeValues(Results, SDLoc(Op));
4554}
4555
4556// Prepare for a Compare And Swap for a subword operation. This needs to be
4557// done in memory with 4 bytes at natural alignment.
4559 SDValue &AlignedAddr, SDValue &BitShift,
4560 SDValue &NegBitShift) {
4561 EVT PtrVT = Addr.getValueType();
4562 EVT WideVT = MVT::i32;
4563
4564 // Get the address of the containing word.
4565 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4566 DAG.getConstant(-4, DL, PtrVT));
4567
4568 // Get the number of bits that the word must be rotated left in order
4569 // to bring the field to the top bits of a GR32.
4570 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4571 DAG.getConstant(3, DL, PtrVT));
4572 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4573
4574 // Get the complementing shift amount, for rotating a field in the top
4575 // bits back to its proper position.
4576 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4577 DAG.getConstant(0, DL, WideVT), BitShift);
4578
4579}
4580
4581// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
4582// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
4583SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
4584 SelectionDAG &DAG,
4585 unsigned Opcode) const {
4586 auto *Node = cast<AtomicSDNode>(Op.getNode());
4587
4588 // 32-bit operations need no special handling.
4589 EVT NarrowVT = Node->getMemoryVT();
4590 EVT WideVT = MVT::i32;
4591 if (NarrowVT == WideVT)
4592 return Op;
4593
4594 int64_t BitSize = NarrowVT.getSizeInBits();
4595 SDValue ChainIn = Node->getChain();
4596 SDValue Addr = Node->getBasePtr();
4597 SDValue Src2 = Node->getVal();
4598 MachineMemOperand *MMO = Node->getMemOperand();
4599 SDLoc DL(Node);
4600
4601 // Convert atomic subtracts of constants into additions.
4602 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
4603 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
4605 Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
4606 }
4607
4608 SDValue AlignedAddr, BitShift, NegBitShift;
4609 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4610
4611 // Extend the source operand to 32 bits and prepare it for the inner loop.
4612 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
4613 // operations require the source to be shifted in advance. (This shift
4614 // can be folded if the source is constant.) For AND and NAND, the lower
4615 // bits must be set, while for other opcodes they should be left clear.
4616 if (Opcode != SystemZISD::ATOMIC_SWAPW)
4617 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
4618 DAG.getConstant(32 - BitSize, DL, WideVT));
4619 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
4621 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
4622 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
4623
4624 // Construct the ATOMIC_LOADW_* node.
4625 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
4626 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
4627 DAG.getConstant(BitSize, DL, WideVT) };
4628 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
4629 NarrowVT, MMO);
4630
4631 // Rotate the result of the final CS so that the field is in the lower
4632 // bits of a GR32, then truncate it.
4633 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
4634 DAG.getConstant(BitSize, DL, WideVT));
4635 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
4636
4637 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
4638 return DAG.getMergeValues(RetOps, DL);
4639}
4640
4641// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
4642// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
4643SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
4644 SelectionDAG &DAG) const {
4645 auto *Node = cast<AtomicSDNode>(Op.getNode());
4646 EVT MemVT = Node->getMemoryVT();
4647 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
4648 // A full-width operation: negate and use LAA(G).
4649 assert(Op.getValueType() == MemVT && "Mismatched VTs");
4650 assert(Subtarget.hasInterlockedAccess1() &&
4651 "Should have been expanded by AtomicExpand pass.");
4652 SDValue Src2 = Node->getVal();
4653 SDLoc DL(Src2);
4654 SDValue NegSrc2 =
4655 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
4656 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
4657 Node->getChain(), Node->getBasePtr(), NegSrc2,
4658 Node->getMemOperand());
4659 }
4660
4661 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
4662}
4663
4664// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
4665SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
4666 SelectionDAG &DAG) const {
4667 auto *Node = cast<AtomicSDNode>(Op.getNode());
4668 SDValue ChainIn = Node->getOperand(0);
4669 SDValue Addr = Node->getOperand(1);
4670 SDValue CmpVal = Node->getOperand(2);
4671 SDValue SwapVal = Node->getOperand(3);
4672 MachineMemOperand *MMO = Node->getMemOperand();
4673 SDLoc DL(Node);
4674
4675 if (Node->getMemoryVT() == MVT::i128) {
4676 // Use same code to handle both legal and non-legal i128 types.
4679 return DAG.getMergeValues(Results, DL);
4680 }
4681
4682 // We have native support for 32-bit and 64-bit compare and swap, but we
4683 // still need to expand extracting the "success" result from the CC.
4684 EVT NarrowVT = Node->getMemoryVT();
4685 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
4686 if (NarrowVT == WideVT) {
4687 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4688 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
4690 DL, Tys, Ops, NarrowVT, MMO);
4691 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4693
4694 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
4695 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4696 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4697 return SDValue();
4698 }
4699
4700 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
4701 // via a fullword ATOMIC_CMP_SWAPW operation.
4702 int64_t BitSize = NarrowVT.getSizeInBits();
4703
4704 SDValue AlignedAddr, BitShift, NegBitShift;
4705 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4706
4707 // Construct the ATOMIC_CMP_SWAPW node.
4708 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4709 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
4710 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
4712 VTList, Ops, NarrowVT, MMO);
4713 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4715
4716 // emitAtomicCmpSwapW() will zero extend the result (original value).
4717 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
4718 DAG.getValueType(NarrowVT));
4719 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
4720 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4721 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4722 return SDValue();
4723}
4724
4726SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
4727 // Because of how we convert atomic_load and atomic_store to normal loads and
4728 // stores in the DAG, we need to ensure that the MMOs are marked volatile
4729 // since DAGCombine hasn't been updated to account for atomic, but non
4730 // volatile loads. (See D57601)
4731 if (auto *SI = dyn_cast<StoreInst>(&I))
4732 if (SI->isAtomic())
4734 if (auto *LI = dyn_cast<LoadInst>(&I))
4735 if (LI->isAtomic())
4737 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
4738 if (AI->isAtomic())
4740 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
4741 if (AI->isAtomic())
4744}
4745
4746SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
4747 SelectionDAG &DAG) const {
4749 auto *Regs = Subtarget.getSpecialRegisters();
4751 report_fatal_error("Variable-sized stack allocations are not supported "
4752 "in GHC calling convention");
4753 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
4754 Regs->getStackPointerRegister(), Op.getValueType());
4755}
4756
4757SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
4758 SelectionDAG &DAG) const {
4760 auto *Regs = Subtarget.getSpecialRegisters();
4761 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4762
4764 report_fatal_error("Variable-sized stack allocations are not supported "
4765 "in GHC calling convention");
4766
4767 SDValue Chain = Op.getOperand(0);
4768 SDValue NewSP = Op.getOperand(1);
4769 SDValue Backchain;
4770 SDLoc DL(Op);
4771
4772 if (StoreBackchain) {
4773 SDValue OldSP = DAG.getCopyFromReg(
4774 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
4775 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4777 }
4778
4779 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
4780
4781 if (StoreBackchain)
4782 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4784
4785 return Chain;
4786}
4787
4788SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
4789 SelectionDAG &DAG) const {
4790 bool IsData = Op.getConstantOperandVal(4);
4791 if (!IsData)
4792 // Just preserve the chain.
4793 return Op.getOperand(0);
4794
4795 SDLoc DL(Op);
4796 bool IsWrite = Op.getConstantOperandVal(2);
4797 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
4798 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
4799 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
4800 Op.getOperand(1)};
4802 Node->getVTList(), Ops,
4803 Node->getMemoryVT(), Node->getMemOperand());
4804}
4805
4806// Convert condition code in CCReg to an i32 value.
4808 SDLoc DL(CCReg);
4809 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
4810 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
4811 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
4812}
4813
4814SDValue
4815SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4816 SelectionDAG &DAG) const {
4817 unsigned Opcode, CCValid;
4818 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
4819 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
4820 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
4821 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
4822 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
4823 return SDValue();
4824 }
4825
4826 return SDValue();
4827}
4828
4829SDValue
4830SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4831 SelectionDAG &DAG) const {
4832 unsigned Opcode, CCValid;
4833 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
4834 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
4835 if (Op->getNumValues() == 1)
4836 return getCCResult(DAG, SDValue(Node, 0));
4837 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
4838 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
4839 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
4840 }
4841
4842 unsigned Id = Op.getConstantOperandVal(0);
4843 switch (Id) {
4844 case Intrinsic::thread_pointer:
4845 return lowerThreadPointer(SDLoc(Op), DAG);
4846
4847 case Intrinsic::s390_vpdi:
4848 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
4849 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4850
4851 case Intrinsic::s390_vperm:
4852 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
4853 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4854
4855 case Intrinsic::s390_vuphb:
4856 case Intrinsic::s390_vuphh:
4857 case Intrinsic::s390_vuphf:
4858 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
4859 Op.getOperand(1));
4860
4861 case Intrinsic::s390_vuplhb:
4862 case Intrinsic::s390_vuplhh:
4863 case Intrinsic::s390_vuplhf:
4864 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
4865 Op.getOperand(1));
4866
4867 case Intrinsic::s390_vuplb:
4868 case Intrinsic::s390_vuplhw:
4869 case Intrinsic::s390_vuplf:
4870 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
4871 Op.getOperand(1));
4872
4873 case Intrinsic::s390_vupllb:
4874 case Intrinsic::s390_vupllh:
4875 case Intrinsic::s390_vupllf:
4876 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
4877 Op.getOperand(1));
4878
4879 case Intrinsic::s390_vsumb:
4880 case Intrinsic::s390_vsumh:
4881 case Intrinsic::s390_vsumgh:
4882 case Intrinsic::s390_vsumgf:
4883 case Intrinsic::s390_vsumqf:
4884 case Intrinsic::s390_vsumqg:
4885 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
4886 Op.getOperand(1), Op.getOperand(2));
4887
4888 case Intrinsic::s390_vaq:
4889 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
4890 Op.getOperand(1), Op.getOperand(2));
4891 case Intrinsic::s390_vaccb:
4892 case Intrinsic::s390_vacch:
4893 case Intrinsic::s390_vaccf:
4894 case Intrinsic::s390_vaccg:
4895 case Intrinsic::s390_vaccq:
4896 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
4897 Op.getOperand(1), Op.getOperand(2));
4898 case Intrinsic::s390_vacq:
4899 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
4900 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4901 case Intrinsic::s390_vacccq:
4902 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
4903 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4904
4905 case Intrinsic::s390_vsq:
4906 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
4907 Op.getOperand(1), Op.getOperand(2));
4908 case Intrinsic::s390_vscbib:
4909 case Intrinsic::s390_vscbih:
4910 case Intrinsic::s390_vscbif:
4911 case Intrinsic::s390_vscbig:
4912 case Intrinsic::s390_vscbiq:
4913 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
4914 Op.getOperand(1), Op.getOperand(2));
4915 case Intrinsic::s390_vsbiq:
4916 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
4917 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4918 case Intrinsic::s390_vsbcbiq:
4919 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
4920 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4921 }
4922
4923 return SDValue();
4924}
4925
4926namespace {
4927// Says that SystemZISD operation Opcode can be used to perform the equivalent
4928// of a VPERM with permute vector Bytes. If Opcode takes three operands,
4929// Operand is the constant third operand, otherwise it is the number of
4930// bytes in each element of the result.
4931struct Permute {
4932 unsigned Opcode;
4933 unsigned Operand;
4934 unsigned char Bytes[SystemZ::VectorBytes];
4935};
4936}
4937
4938static const Permute PermuteForms[] = {
4939 // VMRHG
4941 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
4942 // VMRHF
4944 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
4945 // VMRHH
4947 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
4948 // VMRHB
4950 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
4951 // VMRLG
4953 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
4954 // VMRLF
4956 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
4957 // VMRLH
4959 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
4960 // VMRLB
4962 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
4963 // VPKG
4964 { SystemZISD::PACK, 4,
4965 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
4966 // VPKF
4967 { SystemZISD::PACK, 2,
4968 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
4969 // VPKH
4970 { SystemZISD::PACK, 1,
4971 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
4972 // VPDI V1, V2, 4 (low half of V1, high half of V2)
4974 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
4975 // VPDI V1, V2, 1 (high half of V1, low half of V2)
4977 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
4978};
4979
4980// Called after matching a vector shuffle against a particular pattern.
4981// Both the original shuffle and the pattern have two vector operands.
4982// OpNos[0] is the operand of the original shuffle that should be used for
4983// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
4984// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
4985// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
4986// for operands 0 and 1 of the pattern.
4987static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
4988 if (OpNos[0] < 0) {
4989 if (OpNos[1] < 0)
4990 return false;
4991 OpNo0 = OpNo1 = OpNos[1];
4992 } else if (OpNos[1] < 0) {
4993 OpNo0 = OpNo1 = OpNos[0];
4994 } else {
4995 OpNo0 = OpNos[0];
4996 OpNo1 = OpNos[1];
4997 }
4998 return true;
4999}
5000
5001// Bytes is a VPERM-like permute vector, except that -1 is used for
5002// undefined bytes. Return true if the VPERM can be implemented using P.
5003// When returning true set OpNo0 to the VPERM operand that should be
5004// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5005//
5006// For example, if swapping the VPERM operands allows P to match, OpNo0
5007// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5008// operand, but rewriting it to use two duplicated operands allows it to
5009// match P, then OpNo0 and OpNo1 will be the same.
5010static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5011 unsigned &OpNo0, unsigned &OpNo1) {
5012 int OpNos[] = { -1, -1 };
5013 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5014 int Elt = Bytes[I];
5015 if (Elt >= 0) {
5016 // Make sure that the two permute vectors use the same suboperand
5017 // byte number. Only the operand numbers (the high bits) are
5018 // allowed to differ.
5019 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5020 return false;
5021 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5022 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5023 // Make sure that the operand mappings are consistent with previous
5024 // elements.
5025 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5026 return false;
5027 OpNos[ModelOpNo] = RealOpNo;
5028 }
5029 }
5030 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5031}
5032
5033// As above, but search for a matching permute.
5034static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5035 unsigned &OpNo0, unsigned &OpNo1) {
5036 for (auto &P : PermuteForms)
5037 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5038 return &P;
5039 return nullptr;
5040}
5041
5042// Bytes is a VPERM-like permute vector, except that -1 is used for
5043// undefined bytes. This permute is an operand of an outer permute.
5044// See whether redistributing the -1 bytes gives a shuffle that can be
5045// implemented using P. If so, set Transform to a VPERM-like permute vector
5046// that, when applied to the result of P, gives the original permute in Bytes.
5048 const Permute &P,
5049 SmallVectorImpl<int> &Transform) {
5050 unsigned To = 0;
5051 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5052 int Elt = Bytes[From];
5053 if (Elt < 0)
5054 // Byte number From of the result is undefined.
5055 Transform[From] = -1;
5056 else {
5057 while (P.Bytes[To] != Elt) {
5058 To += 1;
5059 if (To == SystemZ::VectorBytes)
5060 return false;
5061 }
5062 Transform[From] = To;
5063 }
5064 }
5065 return true;
5066}
5067
5068// As above, but search for a matching permute.
5069static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5070 SmallVectorImpl<int> &Transform) {
5071 for (auto &P : PermuteForms)
5072 if (matchDoublePermute(Bytes, P, Transform))
5073 return &P;
5074 return nullptr;
5075}
5076
5077// Convert the mask of the given shuffle op into a byte-level mask,
5078// as if it had type vNi8.
5079static bool getVPermMask(SDValue ShuffleOp,
5080 SmallVectorImpl<int> &Bytes) {
5081 EVT VT = ShuffleOp.getValueType();
5082 unsigned NumElements = VT.getVectorNumElements();
5083 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5084
5085 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5086 Bytes.resize(NumElements * BytesPerElement, -1);
5087 for (unsigned I = 0; I < NumElements; ++I) {
5088 int Index = VSN->getMaskElt(I);
5089 if (Index >= 0)
5090 for (unsigned J = 0; J < BytesPerElement; ++J)
5091 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5092 }
5093 return true;
5094 }
5095 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5096 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5097 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5098 Bytes.resize(NumElements * BytesPerElement, -1);
5099 for (unsigned I = 0; I < NumElements; ++I)
5100 for (unsigned J = 0; J < BytesPerElement; ++J)
5101 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5102 return true;
5103 }
5104 return false;
5105}
5106
5107// Bytes is a VPERM-like permute vector, except that -1 is used for
5108// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5109// the result come from a contiguous sequence of bytes from one input.
5110// Set Base to the selector for the first byte if so.
5111static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5112 unsigned BytesPerElement, int &Base) {
5113 Base = -1;
5114 for (unsigned I = 0; I < BytesPerElement; ++I) {
5115 if (Bytes[Start + I] >= 0) {
5116 unsigned Elem = Bytes[Start + I];
5117 if (Base < 0) {
5118 Base = Elem - I;
5119 // Make sure the bytes would come from one input operand.
5120 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5121 return false;
5122 } else if (unsigned(Base) != Elem - I)
5123 return false;
5124 }
5125 }
5126 return true;
5127}
5128
5129// Bytes is a VPERM-like permute vector, except that -1 is used for
5130// undefined bytes. Return true if it can be performed using VSLDB.
5131// When returning true, set StartIndex to the shift amount and OpNo0
5132// and OpNo1 to the VPERM operands that should be used as the first
5133// and second shift operand respectively.
5135 unsigned &StartIndex, unsigned &OpNo0,
5136 unsigned &OpNo1) {
5137 int OpNos[] = { -1, -1 };
5138 int Shift = -1;
5139 for (unsigned I = 0; I < 16; ++I) {
5140 int Index = Bytes[I];
5141 if (Index >= 0) {
5142 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5143 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5144 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5145 if (Shift < 0)
5146 Shift = ExpectedShift;
5147 else if (Shift != ExpectedShift)
5148 return false;
5149 // Make sure that the operand mappings are consistent with previous
5150 // elements.
5151 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5152 return false;
5153 OpNos[ModelOpNo] = RealOpNo;
5154 }
5155 }
5156 StartIndex = Shift;
5157 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5158}
5159
5160// Create a node that performs P on operands Op0 and Op1, casting the
5161// operands to the appropriate type. The type of the result is determined by P.
5163 const Permute &P, SDValue Op0, SDValue Op1) {
5164 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5165 // elements of a PACK are twice as wide as the outputs.
5166 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5167 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5168 P.Operand);
5169 // Cast both operands to the appropriate type.
5170 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5171 SystemZ::VectorBytes / InBytes);
5172 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5173 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5174 SDValue Op;
5175 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5176 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5177 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5178 } else if (P.Opcode == SystemZISD::PACK) {
5179 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5180 SystemZ::VectorBytes / P.Operand);
5181 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5182 } else {
5183 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5184 }
5185 return Op;
5186}
5187
5188static bool isZeroVector(SDValue N) {
5189 if (N->getOpcode() == ISD::BITCAST)
5190 N = N->getOperand(0);
5191 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5192 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5193 return Op->getZExtValue() == 0;
5194 return ISD::isBuildVectorAllZeros(N.getNode());
5195}
5196
5197// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5198static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5199 for (unsigned I = 0; I < Num ; I++)
5200 if (isZeroVector(Ops[I]))
5201 return I;
5202 return UINT32_MAX;
5203}
5204
5205// Bytes is a VPERM-like permute vector, except that -1 is used for
5206// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5207// VSLDB or VPERM.
5209 SDValue *Ops,
5210 const SmallVectorImpl<int> &Bytes) {
5211 for (unsigned I = 0; I < 2; ++I)
5212 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5213
5214 // First see whether VSLDB can be used.
5215 unsigned StartIndex, OpNo0, OpNo1;
5216 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5217 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5218 Ops[OpNo1],
5219 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5220
5221 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5222 // eliminate a zero vector by reusing any zero index in the permute vector.
5223 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5224 if (ZeroVecIdx != UINT32_MAX) {
5225 bool MaskFirst = true;
5226 int ZeroIdx = -1;
5227 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5228 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5229 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5230 if (OpNo == ZeroVecIdx && I == 0) {
5231 // If the first byte is zero, use mask as first operand.
5232 ZeroIdx = 0;
5233 break;
5234 }
5235 if (OpNo != ZeroVecIdx && Byte == 0) {
5236 // If mask contains a zero, use it by placing that vector first.
5237 ZeroIdx = I + SystemZ::VectorBytes;
5238 MaskFirst = false;
5239 break;
5240 }
5241 }
5242 if (ZeroIdx != -1) {
5243 SDValue IndexNodes[SystemZ::VectorBytes];
5244 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5245 if (Bytes[I] >= 0) {
5246 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5247 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5248 if (OpNo == ZeroVecIdx)
5249 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5250 else {
5251 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5252 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5253 }
5254 } else
5255 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5256 }
5257 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5258 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5259 if (MaskFirst)
5260 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5261 Mask);
5262 else
5263 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5264 Mask);
5265 }
5266 }
5267
5268 SDValue IndexNodes[SystemZ::VectorBytes];
5269 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5270 if (Bytes[I] >= 0)
5271 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5272 else
5273 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5274 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5275 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5276 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5277}
5278
5279namespace {
5280// Describes a general N-operand vector shuffle.
5281struct GeneralShuffle {
5282 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}
5283 void addUndef();
5284 bool add(SDValue, unsigned);
5285 SDValue getNode(SelectionDAG &, const SDLoc &);
5286 void tryPrepareForUnpack();
5287 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5288 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5289
5290 // The operands of the shuffle.
5292
5293 // Index I is -1 if byte I of the result is undefined. Otherwise the
5294 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5295 // Bytes[I] / SystemZ::VectorBytes.
5297
5298 // The type of the shuffle result.
5299 EVT VT;
5300
5301 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5302 unsigned UnpackFromEltSize;
5303};
5304}
5305
5306// Add an extra undefined element to the shuffle.
5307void GeneralShuffle::addUndef() {
5308 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5309 for (unsigned I = 0; I < BytesPerElement; ++I)
5310 Bytes.push_back(-1);
5311}
5312
5313// Add an extra element to the shuffle, taking it from element Elem of Op.
5314// A null Op indicates a vector input whose value will be calculated later;
5315// there is at most one such input per shuffle and it always has the same
5316// type as the result. Aborts and returns false if the source vector elements
5317// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
5318// LLVM they become implicitly extended, but this is rare and not optimized.
5319bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
5320 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5321
5322 // The source vector can have wider elements than the result,
5323 // either through an explicit TRUNCATE or because of type legalization.
5324 // We want the least significant part.
5325 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
5326 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
5327
5328 // Return false if the source elements are smaller than their destination
5329 // elements.
5330 if (FromBytesPerElement < BytesPerElement)
5331 return false;
5332
5333 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
5334 (FromBytesPerElement - BytesPerElement));
5335
5336 // Look through things like shuffles and bitcasts.
5337 while (Op.getNode()) {
5338 if (Op.getOpcode() == ISD::BITCAST)
5339 Op = Op.getOperand(0);
5340 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
5341 // See whether the bytes we need come from a contiguous part of one
5342 // operand.
5344 if (!getVPermMask(Op, OpBytes))
5345 break;
5346 int NewByte;
5347 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
5348 break;
5349 if (NewByte < 0) {
5350 addUndef();
5351 return true;
5352 }
5353 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
5354 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
5355 } else if (Op.isUndef()) {
5356 addUndef();
5357 return true;
5358 } else
5359 break;
5360 }
5361
5362 // Make sure that the source of the extraction is in Ops.
5363 unsigned OpNo = 0;
5364 for (; OpNo < Ops.size(); ++OpNo)
5365 if (Ops[OpNo] == Op)
5366 break;
5367 if (OpNo == Ops.size())
5368 Ops.push_back(Op);
5369
5370 // Add the element to Bytes.
5371 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
5372 for (unsigned I = 0; I < BytesPerElement; ++I)
5373 Bytes.push_back(Base + I);
5374
5375 return true;
5376}
5377
5378// Return SDNodes for the completed shuffle.
5379SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
5380 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
5381
5382 if (Ops.size() == 0)
5383 return DAG.getUNDEF(VT);
5384
5385 // Use a single unpack if possible as the last operation.
5386 tryPrepareForUnpack();
5387
5388 // Make sure that there are at least two shuffle operands.
5389 if (Ops.size() == 1)
5390 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
5391
5392 // Create a tree of shuffles, deferring root node until after the loop.
5393 // Try to redistribute the undefined elements of non-root nodes so that
5394 // the non-root shuffles match something like a pack or merge, then adjust
5395 // the parent node's permute vector to compensate for the new order.
5396 // Among other things, this copes with vectors like <2 x i16> that were
5397 // padded with undefined elements during type legalization.
5398 //
5399 // In the best case this redistribution will lead to the whole tree
5400 // using packs and merges. It should rarely be a loss in other cases.
5401 unsigned Stride = 1;
5402 for (; Stride * 2 < Ops.size(); Stride *= 2) {
5403 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
5404 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
5405
5406 // Create a mask for just these two operands.
5408 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5409 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
5410 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
5411 if (OpNo == I)
5412 NewBytes[J] = Byte;
5413 else if (OpNo == I + Stride)
5414 NewBytes[J] = SystemZ::VectorBytes + Byte;
5415 else
5416 NewBytes[J] = -1;
5417 }
5418 // See if it would be better to reorganize NewMask to avoid using VPERM.
5420 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
5421 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
5422 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
5423 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5424 if (NewBytes[J] >= 0) {
5425 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
5426 "Invalid double permute");
5427 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
5428 } else
5429 assert(NewBytesMap[J] < 0 && "Invalid double permute");
5430 }
5431 } else {
5432 // Just use NewBytes on the operands.
5433 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
5434 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
5435 if (NewBytes[J] >= 0)
5436 Bytes[J] = I * SystemZ::VectorBytes + J;
5437 }
5438 }
5439 }
5440
5441 // Now we just have 2 inputs. Put the second operand in Ops[1].
5442 if (Stride > 1) {
5443 Ops[1] = Ops[Stride];
5444 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5445 if (Bytes[I] >= int(SystemZ::VectorBytes))
5446 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
5447 }
5448
5449 // Look for an instruction that can do the permute without resorting
5450 // to VPERM.
5451 unsigned OpNo0, OpNo1;
5452 SDValue Op;
5453 if (unpackWasPrepared() && Ops[1].isUndef())
5454 Op = Ops[0];
5455 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
5456 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
5457 else
5458 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
5459
5460 Op = insertUnpackIfPrepared(DAG, DL, Op);
5461
5462 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5463}
5464
5465#ifndef NDEBUG
5466static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
5467 dbgs() << Msg.c_str() << " { ";
5468 for (unsigned i = 0; i < Bytes.size(); i++)
5469 dbgs() << Bytes[i] << " ";
5470 dbgs() << "}\n";
5471}
5472#endif
5473
5474// If the Bytes vector matches an unpack operation, prepare to do the unpack
5475// after all else by removing the zero vector and the effect of the unpack on
5476// Bytes.
5477void GeneralShuffle::tryPrepareForUnpack() {
5478 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
5479 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
5480 return;
5481
5482 // Only do this if removing the zero vector reduces the depth, otherwise
5483 // the critical path will increase with the final unpack.
5484 if (Ops.size() > 2 &&
5485 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
5486 return;
5487
5488 // Find an unpack that would allow removing the zero vector from Ops.
5489 UnpackFromEltSize = 1;
5490 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
5491 bool MatchUnpack = true;
5493 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
5494 unsigned ToEltSize = UnpackFromEltSize * 2;
5495 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
5496 if (!IsZextByte)
5497 SrcBytes.push_back(Bytes[Elt]);
5498 if (Bytes[Elt] != -1) {
5499 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
5500 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
5501 MatchUnpack = false;
5502 break;
5503 }
5504 }
5505 }
5506 if (MatchUnpack) {
5507 if (Ops.size() == 2) {
5508 // Don't use unpack if a single source operand needs rearrangement.
5509 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
5510 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
5511 UnpackFromEltSize = UINT_MAX;
5512 return;
5513 }
5514 }
5515 break;
5516 }
5517 }
5518 if (UnpackFromEltSize > 4)
5519 return;
5520
5521 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
5522 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
5523 << ".\n";
5524 dumpBytes(Bytes, "Original Bytes vector:"););
5525
5526 // Apply the unpack in reverse to the Bytes array.
5527 unsigned B = 0;
5528 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
5529 Elt += UnpackFromEltSize;
5530 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
5531 Bytes[B] = Bytes[Elt];
5532 }
5533 while (B < SystemZ::VectorBytes)
5534 Bytes[B++] = -1;
5535
5536 // Remove the zero vector from Ops
5537 Ops.erase(&Ops[ZeroVecOpNo]);
5538 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5539 if (Bytes[I] >= 0) {
5540 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5541 if (OpNo > ZeroVecOpNo)
5542 Bytes[I] -= SystemZ::VectorBytes;
5543 }
5544
5545 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
5546 dbgs() << "\n";);
5547}
5548
5549SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
5550 const SDLoc &DL,
5551 SDValue Op) {
5552 if (!unpackWasPrepared())
5553 return Op;
5554 unsigned InBits = UnpackFromEltSize * 8;
5555 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
5556 SystemZ::VectorBits / InBits);
5557 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
5558 unsigned OutBits = InBits * 2;
5559 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
5560 SystemZ::VectorBits / OutBits);
5561 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
5562}
5563
5564// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
5566 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
5567 if (!Op.getOperand(I).isUndef())
5568 return false;
5569 return true;
5570}
5571
5572// Return a vector of type VT that contains Value in the first element.
5573// The other elements don't matter.
5575 SDValue Value) {
5576 // If we have a constant, replicate it to all elements and let the
5577 // BUILD_VECTOR lowering take care of it.
5578 if (Value.getOpcode() == ISD::Constant ||
5579 Value.getOpcode() == ISD::ConstantFP) {
5581 return DAG.getBuildVector(VT, DL, Ops);
5582 }
5583 if (Value.isUndef())
5584 return DAG.getUNDEF(VT);
5585 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
5586}
5587
5588// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
5589// element 1. Used for cases in which replication is cheap.
5591 SDValue Op0, SDValue Op1) {
5592 if (Op0.isUndef()) {
5593 if (Op1.isUndef())
5594 return DAG.getUNDEF(VT);
5595 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
5596 }
5597 if (Op1.isUndef())
5598 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
5599 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
5600 buildScalarToVector(DAG, DL, VT, Op0),
5601 buildScalarToVector(DAG, DL, VT, Op1));
5602}
5603
5604// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
5605// vector for them.
5607 SDValue Op1) {
5608 if (Op0.isUndef() && Op1.isUndef())
5609 return DAG.getUNDEF(MVT::v2i64);
5610 // If one of the two inputs is undefined then replicate the other one,
5611 // in order to avoid using another register unnecessarily.
5612 if (Op0.isUndef())
5613 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5614 else if (Op1.isUndef())
5615 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5616 else {
5617 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5618 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5619 }
5620 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
5621}
5622
5623// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
5624// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
5625// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
5626// would benefit from this representation and return it if so.
5628 BuildVectorSDNode *BVN) {
5629 EVT VT = BVN->getValueType(0);
5630 unsigned NumElements = VT.getVectorNumElements();
5631
5632 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
5633 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
5634 // need a BUILD_VECTOR, add an additional placeholder operand for that
5635 // BUILD_VECTOR and store its operands in ResidueOps.
5636 GeneralShuffle GS(VT);
5638 bool FoundOne = false;
5639 for (unsigned I = 0; I < NumElements; ++I) {
5640 SDValue Op = BVN->getOperand(I);
5641 if (Op.getOpcode() == ISD::TRUNCATE)
5642 Op = Op.getOperand(0);
5643 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5644 Op.getOperand(1).getOpcode() == ISD::Constant) {
5645 unsigned Elem = Op.getConstantOperandVal(1);
5646 if (!GS.add(Op.getOperand(0), Elem))
5647 return SDValue();
5648 FoundOne = true;
5649 } else if (Op.isUndef()) {
5650 GS.addUndef();
5651 } else {
5652 if (!GS.add(SDValue(), ResidueOps.size()))
5653 return SDValue();
5654 ResidueOps.push_back(BVN->getOperand(I));
5655 }
5656 }
5657
5658 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
5659 if (!FoundOne)
5660 return SDValue();
5661
5662 // Create the BUILD_VECTOR for the remaining elements, if any.
5663 if (!ResidueOps.empty()) {
5664 while (ResidueOps.size() < NumElements)
5665 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
5666 for (auto &Op : GS.Ops) {
5667 if (!Op.getNode()) {
5668 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
5669 break;
5670 }
5671 }
5672 }
5673 return GS.getNode(DAG, SDLoc(BVN));
5674}
5675
5676bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
5677 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
5678 return true;
5679 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
5680 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
5681 return true;
5682 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
5683 return true;
5684 return false;
5685}
5686
5687// Combine GPR scalar values Elems into a vector of type VT.
5688SDValue
5689SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5690 SmallVectorImpl<SDValue> &Elems) const {
5691 // See whether there is a single replicated value.
5693 unsigned int NumElements = Elems.size();
5694 unsigned int Count = 0;
5695 for (auto Elem : Elems) {
5696 if (!Elem.isUndef()) {
5697 if (!Single.getNode())
5698 Single = Elem;
5699 else if (Elem != Single) {
5700 Single = SDValue();
5701 break;
5702 }
5703 Count += 1;
5704 }
5705 }
5706 // There are three cases here:
5707 //
5708 // - if the only defined element is a loaded one, the best sequence
5709 // is a replicating load.
5710 //
5711 // - otherwise, if the only defined element is an i64 value, we will
5712 // end up with the same VLVGP sequence regardless of whether we short-cut
5713 // for replication or fall through to the later code.
5714 //
5715 // - otherwise, if the only defined element is an i32 or smaller value,
5716 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
5717 // This is only a win if the single defined element is used more than once.
5718 // In other cases we're better off using a single VLVGx.
5719 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
5720 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
5721
5722 // If all elements are loads, use VLREP/VLEs (below).
5723 bool AllLoads = true;
5724 for (auto Elem : Elems)
5725 if (!isVectorElementLoad(Elem)) {
5726 AllLoads = false;
5727 break;
5728 }
5729
5730 // The best way of building a v2i64 from two i64s is to use VLVGP.
5731 if (VT == MVT::v2i64 && !AllLoads)
5732 return joinDwords(DAG, DL, Elems[0], Elems[1]);
5733
5734 // Use a 64-bit merge high to combine two doubles.
5735 if (VT == MVT::v2f64 && !AllLoads)
5736 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5737
5738 // Build v4f32 values directly from the FPRs:
5739 //
5740 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
5741 // V V VMRHF
5742 // <ABxx> <CDxx>
5743 // V VMRHG
5744 // <ABCD>
5745 if (VT == MVT::v4f32 && !AllLoads) {
5746 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5747 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
5748 // Avoid unnecessary undefs by reusing the other operand.
5749 if (Op01.isUndef())
5750 Op01 = Op23;
5751 else if (Op23.isUndef())
5752 Op23 = Op01;
5753 // Merging identical replications is a no-op.
5754 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
5755 return Op01;
5756 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
5757 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
5759 DL, MVT::v2i64, Op01, Op23);
5760 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5761 }
5762
5763 // Collect the constant terms.
5766
5767 unsigned NumConstants = 0;
5768 for (unsigned I = 0; I < NumElements; ++I) {
5769 SDValue Elem = Elems[I];
5770 if (Elem.getOpcode() == ISD::Constant ||
5771 Elem.getOpcode() == ISD::ConstantFP) {
5772 NumConstants += 1;
5773 Constants[I] = Elem;
5774 Done[I] = true;
5775 }
5776 }
5777 // If there was at least one constant, fill in the other elements of
5778 // Constants with undefs to get a full vector constant and use that
5779 // as the starting point.
5781 SDValue ReplicatedVal;
5782 if (NumConstants > 0) {
5783 for (unsigned I = 0; I < NumElements; ++I)
5784 if (!Constants[I].getNode())
5785 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
5786 Result = DAG.getBuildVector(VT, DL, Constants);
5787 } else {
5788 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
5789 // avoid a false dependency on any previous contents of the vector
5790 // register.
5791
5792 // Use a VLREP if at least one element is a load. Make sure to replicate
5793 // the load with the most elements having its value.
5794 std::map<const SDNode*, unsigned> UseCounts;
5795 SDNode *LoadMaxUses = nullptr;
5796 for (unsigned I = 0; I < NumElements; ++I)
5797 if (isVectorElementLoad(Elems[I])) {
5798 SDNode *Ld = Elems[I].getNode();
5799 UseCounts[Ld]++;
5800 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
5801 LoadMaxUses = Ld;
5802 }
5803 if (LoadMaxUses != nullptr) {
5804 ReplicatedVal = SDValue(LoadMaxUses, 0);
5805 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
5806 } else {
5807 // Try to use VLVGP.
5808 unsigned I1 = NumElements / 2 - 1;
5809 unsigned I2 = NumElements - 1;
5810 bool Def1 = !Elems[I1].isUndef();
5811 bool Def2 = !Elems[I2].isUndef();
5812 if (Def1 || Def2) {
5813 SDValue Elem1 = Elems[Def1 ? I1 : I2];
5814 SDValue Elem2 = Elems[Def2 ? I2 : I1];
5815 Result = DAG.getNode(ISD::BITCAST, DL, VT,
5816 joinDwords(DAG, DL, Elem1, Elem2));
5817 Done[I1] = true;
5818 Done[I2] = true;
5819 } else
5820 Result = DAG.getUNDEF(VT);
5821 }
5822 }
5823
5824 // Use VLVGx to insert the other elements.
5825 for (unsigned I = 0; I < NumElements; ++I)
5826 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
5827 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
5828 DAG.getConstant(I, DL, MVT::i32));
5829 return Result;
5830}
5831
5832SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
5833 SelectionDAG &DAG) const {
5834 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
5835 SDLoc DL(Op);
5836 EVT VT = Op.getValueType();
5837
5838 if (BVN->isConstant()) {
5839 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
5840 return Op;
5841
5842 // Fall back to loading it from memory.
5843 return SDValue();
5844 }
5845
5846 // See if we should use shuffles to construct the vector from other vectors.
5847 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
5848 return Res;
5849
5850 // Detect SCALAR_TO_VECTOR conversions.
5852 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
5853
5854 // Otherwise use buildVector to build the vector up from GPRs.
5855 unsigned NumElements = Op.getNumOperands();
5857 for (unsigned I = 0; I < NumElements; ++I)
5858 Ops[I] = Op.getOperand(I);
5859 return buildVector(DAG, DL, VT, Ops);
5860}
5861
5862SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
5863 SelectionDAG &DAG) const {
5864 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
5865 SDLoc DL(Op);
5866 EVT VT = Op.getValueType();
5867 unsigned NumElements = VT.getVectorNumElements();
5868
5869 if (VSN->isSplat()) {
5870 SDValue Op0 = Op.getOperand(0);
5871 unsigned Index = VSN->getSplatIndex();
5873 "Splat index should be defined and in first operand");
5874 // See whether the value we're splatting is directly available as a scalar.
5875 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5877 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
5878 // Otherwise keep it as a vector-to-vector operation.
5879 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
5880 DAG.getTargetConstant(Index, DL, MVT::i32));
5881 }
5882
5883 GeneralShuffle GS(VT);
5884 for (unsigned I = 0; I < NumElements; ++I) {
5885 int Elt = VSN->getMaskElt(I);
5886 if (Elt < 0)
5887 GS.addUndef();
5888 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
5889 unsigned(Elt) % NumElements))
5890 return SDValue();
5891 }
5892 return GS.getNode(DAG, SDLoc(VSN));
5893}
5894
5895SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
5896 SelectionDAG &DAG) const {
5897 SDLoc DL(Op);
5898 // Just insert the scalar into element 0 of an undefined vector.
5899 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
5900 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
5901 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
5902}
5903
5904SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
5905 SelectionDAG &DAG) const {
5906 // Handle insertions of floating-point values.
5907 SDLoc DL(Op);
5908 SDValue Op0 = Op.getOperand(0);
5909 SDValue Op1 = Op.getOperand(1);
5910 SDValue Op2 = Op.getOperand(2);
5911 EVT VT = Op.getValueType();
5912
5913 // Insertions into constant indices of a v2f64 can be done using VPDI.
5914 // However, if the inserted value is a bitcast or a constant then it's
5915 // better to use GPRs, as below.
5916 if (VT == MVT::v2f64 &&
5917 Op1.getOpcode() != ISD::BITCAST &&
5918 Op1.getOpcode() != ISD::ConstantFP &&
5919 Op2.getOpcode() == ISD::Constant) {
5920 uint64_t Index = Op2->getAsZExtVal();
5921 unsigned Mask = VT.getVectorNumElements() - 1;
5922 if (Index <= Mask)
5923 return Op;
5924 }
5925
5926 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
5928 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
5929 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
5930 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
5931 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
5932 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5933}
5934
5935SDValue
5936SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
5937 SelectionDAG &DAG) const {
5938 // Handle extractions of floating-point values.
5939 SDLoc DL(Op);
5940 SDValue Op0 = Op.getOperand(0);
5941 SDValue Op1 = Op.getOperand(1);
5942 EVT VT = Op.getValueType();
5943 EVT VecVT = Op0.getValueType();
5944
5945 // Extractions of constant indices can be done directly.
5946 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
5947 uint64_t Index = CIndexN->getZExtValue();
5948 unsigned Mask = VecVT.getVectorNumElements() - 1;
5949 if (Index <= Mask)
5950 return Op;
5951 }
5952
5953 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
5954 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
5955 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
5956 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
5957 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
5958 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5959}
5960
5961SDValue SystemZTargetLowering::
5962lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5963 SDValue PackedOp = Op.getOperand(0);
5964 EVT OutVT = Op.getValueType();
5965 EVT InVT = PackedOp.getValueType();
5966 unsigned ToBits = OutVT.getScalarSizeInBits();
5967 unsigned FromBits = InVT.getScalarSizeInBits();
5968 do {
5969 FromBits *= 2;
5970 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
5971 SystemZ::VectorBits / FromBits);
5972 PackedOp =
5973 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
5974 } while (FromBits != ToBits);
5975 return PackedOp;
5976}
5977
5978// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
5979SDValue SystemZTargetLowering::
5980lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5981 SDValue PackedOp = Op.getOperand(0);
5982 SDLoc DL(Op);
5983 EVT OutVT = Op.getValueType();
5984 EVT InVT = PackedOp.getValueType();
5985 unsigned InNumElts = InVT.getVectorNumElements();
5986 unsigned OutNumElts = OutVT.getVectorNumElements();
5987 unsigned NumInPerOut = InNumElts / OutNumElts;
5988
5989 SDValue ZeroVec =
5990 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
5991
5992 SmallVector<int, 16> Mask(InNumElts);
5993 unsigned ZeroVecElt = InNumElts;
5994 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
5995 unsigned MaskElt = PackedElt * NumInPerOut;
5996 unsigned End = MaskElt + NumInPerOut - 1;
5997 for (; MaskElt < End; MaskElt++)
5998 Mask[MaskElt] = ZeroVecElt++;
5999 Mask[MaskElt] = PackedElt;
6000 }
6001 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
6002 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
6003}
6004
6005SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6006 unsigned ByScalar) const {
6007 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6008 SDValue Op0 = Op.getOperand(0);
6009 SDValue Op1 = Op.getOperand(1);
6010 SDLoc DL(Op);
6011 EVT VT = Op.getValueType();
6012 unsigned ElemBitSize = VT.getScalarSizeInBits();
6013
6014 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6015 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6016 APInt SplatBits, SplatUndef;
6017 unsigned SplatBitSize;
6018 bool HasAnyUndefs;
6019 // Check for constant splats. Use ElemBitSize as the minimum element
6020 // width and reject splats that need wider elements.
6021 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6022 ElemBitSize, true) &&
6023 SplatBitSize == ElemBitSize) {
6024 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6025 DL, MVT::i32);
6026 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6027 }
6028 // Check for variable splats.
6029 BitVector UndefElements;
6030 SDValue Splat = BVN->getSplatValue(&UndefElements);
6031 if (Splat) {
6032 // Since i32 is the smallest legal type, we either need a no-op
6033 // or a truncation.
6034 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6035 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6036 }
6037 }
6038
6039 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6040 // and the shift amount is directly available in a GPR.
6041 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6042 if (VSN->isSplat()) {
6043 SDValue VSNOp0 = VSN->getOperand(0);
6044 unsigned Index = VSN->getSplatIndex();
6046 "Splat index should be defined and in first operand");
6047 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6048 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6049 // Since i32 is the smallest legal type, we either need a no-op
6050 // or a truncation.
6051 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6052 VSNOp0.getOperand(Index));
6053 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6054 }
6055 }
6056 }
6057
6058 // Otherwise just treat the current form as legal.
6059 return Op;
6060}
6061
6062SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
6063 SelectionDAG &DAG) const {
6064 SDLoc DL(Op);
6065 MVT ResultVT = Op.getSimpleValueType();
6066 SDValue Arg = Op.getOperand(0);
6067 unsigned Check = Op.getConstantOperandVal(1);
6068
6069 unsigned TDCMask = 0;
6070 if (Check & fcSNan)
6072 if (Check & fcQNan)
6074 if (Check & fcPosInf)
6076 if (Check & fcNegInf)
6078 if (Check & fcPosNormal)
6080 if (Check & fcNegNormal)
6082 if (Check & fcPosSubnormal)
6084 if (Check & fcNegSubnormal)
6086 if (Check & fcPosZero)
6087 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
6088 if (Check & fcNegZero)
6089 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
6090 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
6091
6092 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
6093 return getCCResult(DAG, Intr);
6094}
6095
6096SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
6097 SelectionDAG &DAG) const {
6098 SDLoc DL(Op);
6099 SDValue Chain = Op.getOperand(0);
6100
6101 // STCKF only supports a memory operand, so we have to use a temporary.
6102 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
6103 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
6104 MachinePointerInfo MPI =
6106
6107 // Use STCFK to store the TOD clock into the temporary.
6108 SDValue StoreOps[] = {Chain, StackPtr};
6109 Chain = DAG.getMemIntrinsicNode(
6110 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
6112
6113 // And read it back from there.
6114 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
6115}
6116
6118 SelectionDAG &DAG) const {
6119 switch (Op.getOpcode()) {
6120 case ISD::FRAMEADDR:
6121 return lowerFRAMEADDR(Op, DAG);
6122 case ISD::RETURNADDR:
6123 return lowerRETURNADDR(Op, DAG);
6124 case ISD::BR_CC:
6125 return lowerBR_CC(Op, DAG);
6126 case ISD::SELECT_CC:
6127 return lowerSELECT_CC(Op, DAG);
6128 case ISD::SETCC:
6129 return lowerSETCC(Op, DAG);
6130 case ISD::STRICT_FSETCC:
6131 return lowerSTRICT_FSETCC(Op, DAG, false);
6133 return lowerSTRICT_FSETCC(Op, DAG, true);
6134 case ISD::GlobalAddress:
6135 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
6137 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
6138 case ISD::BlockAddress:
6139 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
6140 case ISD::JumpTable:
6141 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
6142 case ISD::ConstantPool:
6143 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
6144 case ISD::BITCAST:
6145 return lowerBITCAST(Op, DAG);
6146 case ISD::VASTART:
6147 return lowerVASTART(Op, DAG);
6148 case ISD::VACOPY:
6149 return lowerVACOPY(Op, DAG);
6151 return lowerDYNAMIC_STACKALLOC(Op, DAG);
6153 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
6154 case ISD::SMUL_LOHI:
6155 return lowerSMUL_LOHI(Op, DAG);
6156 case ISD::UMUL_LOHI:
6157 return lowerUMUL_LOHI(Op, DAG);
6158 case ISD::SDIVREM:
6159 return lowerSDIVREM(Op, DAG);
6160 case ISD::UDIVREM:
6161 return lowerUDIVREM(Op, DAG);
6162 case ISD::SADDO:
6163 case ISD::SSUBO:
6164 case ISD::UADDO:
6165 case ISD::USUBO:
6166 return lowerXALUO(Op, DAG);
6167 case ISD::UADDO_CARRY:
6168 case ISD::USUBO_CARRY:
6169 return lowerUADDSUBO_CARRY(Op, DAG);
6170 case ISD::OR:
6171 return lowerOR(Op, DAG);
6172 case ISD::CTPOP:
6173 return lowerCTPOP(Op, DAG);
6174 case ISD::VECREDUCE_ADD:
6175 return lowerVECREDUCE_ADD(Op, DAG);
6176 case ISD::ATOMIC_FENCE:
6177 return lowerATOMIC_FENCE(Op, DAG);
6178 case ISD::ATOMIC_SWAP:
6179 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
6180 case ISD::ATOMIC_STORE:
6181 case ISD::ATOMIC_LOAD:
6182 return lowerATOMIC_LDST_I128(Op, DAG);
6184 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
6186 return lowerATOMIC_LOAD_SUB(Op, DAG);
6188 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
6190 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
6192 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
6194 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
6196 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
6198 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
6200 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
6202 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
6204 return lowerATOMIC_CMP_SWAP(Op, DAG);
6205 case ISD::STACKSAVE:
6206 return lowerSTACKSAVE(Op, DAG);
6207 case ISD::STACKRESTORE:
6208 return lowerSTACKRESTORE(Op, DAG);
6209 case ISD::PREFETCH:
6210 return lowerPREFETCH(Op, DAG);
6212 return lowerINTRINSIC_W_CHAIN(Op, DAG);
6214 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
6215 case ISD::BUILD_VECTOR:
6216 return lowerBUILD_VECTOR(Op, DAG);
6218 return lowerVECTOR_SHUFFLE(Op, DAG);
6220 return lowerSCALAR_TO_VECTOR(Op, DAG);
6222 return lowerINSERT_VECTOR_ELT(Op, DAG);
6224 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6226 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
6228 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
6229 case ISD::SHL:
6230 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
6231 case ISD::SRL:
6232 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
6233 case ISD::SRA:
6234 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
6235 case ISD::ROTL:
6236 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
6237 case ISD::IS_FPCLASS:
6238 return lowerIS_FPCLASS(Op, DAG);
6239 case ISD::GET_ROUNDING:
6240 return lowerGET_ROUNDING(Op, DAG);
6242 return lowerREADCYCLECOUNTER(Op, DAG);
6243 default:
6244 llvm_unreachable("Unexpected node to lower");
6245 }
6246}
6247
6249 const SDLoc &SL) {
6250 // If i128 is legal, just use a normal bitcast.
6251 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
6252 return DAG.getBitcast(MVT::f128, Src);
6253
6254 // Otherwise, f128 must live in FP128, so do a partwise move.
6256 &SystemZ::FP128BitRegClass);
6257
6258 SDValue Hi, Lo;
6259 std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);
6260
6261 Hi = DAG.getBitcast(MVT::f64, Hi);
6262 Lo = DAG.getBitcast(MVT::f64, Lo);
6263
6264 SDNode *Pair = DAG.getMachineNode(
6265 SystemZ::REG_SEQUENCE, SL, MVT::f128,
6266 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo,
6267 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi,
6268 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)});
6269 return SDValue(Pair, 0);
6270}
6271
6273 const SDLoc &SL) {
6274 // If i128 is legal, just use a normal bitcast.
6275 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
6276 return DAG.getBitcast(MVT::i128, Src);
6277
6278 // Otherwise, f128 must live in FP128, so do a partwise move.
6280 &SystemZ::FP128BitRegClass);
6281
6282 SDValue LoFP =
6283 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
6284 SDValue HiFP =
6285 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);
6286 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
6287 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
6288
6289 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);
6290}
6291
6292// Lower operations with invalid operand or result types (currently used
6293// only for 128-bit integer types).
6294void
6297 SelectionDAG &DAG) const {
6298 switch (N->getOpcode()) {
6299 case ISD::ATOMIC_LOAD: {
6300 SDLoc DL(N);
6301 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
6302 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
6303 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6305 DL, Tys, Ops, MVT::i128, MMO);
6306
6307 SDValue Lowered = lowerGR128ToI128(DAG, Res);
6308 if (N->getValueType(0) == MVT::f128)
6309 Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);
6310 Results.push_back(Lowered);
6311 Results.push_back(Res.getValue(1));
6312 break;
6313 }
6314 case ISD::ATOMIC_STORE: {
6315 SDLoc DL(N);
6316 SDVTList Tys = DAG.getVTList(MVT::Other);
6317 SDValue Val = N->getOperand(1);
6318 if (Val.getValueType() == MVT::f128)
6319 Val = expandBitCastF128ToI128(DAG, Val, DL);
6320 Val = lowerI128ToGR128(DAG, Val);
6321
6322 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
6323 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6325 DL, Tys, Ops, MVT::i128, MMO);
6326 // We have to enforce sequential consistency by performing a
6327 // serialization operation after the store.
6328 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
6330 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
6331 MVT::Other, Res), 0);
6332 Results.push_back(Res);
6333 break;
6334 }
6336 SDLoc DL(N);
6337 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
6338 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
6339 lowerI128ToGR128(DAG, N->getOperand(2)),
6340 lowerI128ToGR128(DAG, N->getOperand(3)) };
6341 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6343 DL, Tys, Ops, MVT::i128, MMO);
6344 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
6346 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
6347 Results.push_back(lowerGR128ToI128(DAG, Res));
6348 Results.push_back(Success);
6349 Results.push_back(Res.getValue(2));
6350 break;
6351 }
6352 case ISD::BITCAST: {
6353 SDValue Src = N->getOperand(0);
6354 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
6355 !useSoftFloat()) {
6356 SDLoc DL(N);
6357 Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));
6358 }
6359 break;
6360 }
6361 default:
6362 llvm_unreachable("Unexpected node to lower");
6363 }
6364}
6365
6366void
6369 SelectionDAG &DAG) const {
6370 return LowerOperationWrapper(N, Results, DAG);
6371}
6372
6373const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
6374#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
6375 switch ((SystemZISD::NodeType)Opcode) {
6376 case SystemZISD::FIRST_NUMBER: break;
6377 OPCODE(RET_GLUE);
6378 OPCODE(CALL);
6379 OPCODE(SIBCALL);
6380 OPCODE(TLS_GDCALL);
6381 OPCODE(TLS_LDCALL);
6382 OPCODE(PCREL_WRAPPER);
6383 OPCODE(PCREL_OFFSET);
6384 OPCODE(ICMP);
6385 OPCODE(FCMP);
6386 OPCODE(STRICT_FCMP);
6387 OPCODE(STRICT_FCMPS);
6388 OPCODE(TM);
6389 OPCODE(BR_CCMASK);
6390 OPCODE(SELECT_CCMASK);
6391 OPCODE(ADJDYNALLOC);
6392 OPCODE(PROBED_ALLOCA);
6393 OPCODE(POPCNT);
6394 OPCODE(SMUL_LOHI);
6395 OPCODE(UMUL_LOHI);
6396 OPCODE(SDIVREM);
6397 OPCODE(UDIVREM);
6398 OPCODE(SADDO);
6399 OPCODE(SSUBO);
6400 OPCODE(UADDO);
6401 OPCODE(USUBO);
6402 OPCODE(ADDCARRY);
6403 OPCODE(SUBCARRY);
6404 OPCODE(GET_CCMASK);
6405 OPCODE(MVC);
6406 OPCODE(NC);
6407 OPCODE(OC);
6408 OPCODE(XC);
6409 OPCODE(CLC);
6410 OPCODE(MEMSET_MVC);
6411 OPCODE(STPCPY);
6412 OPCODE(STRCMP);
6413 OPCODE(SEARCH_STRING);
6414 OPCODE(IPM);
6415 OPCODE(TBEGIN);
6416 OPCODE(TBEGIN_NOFLOAT);
6417 OPCODE(TEND);
6418 OPCODE(BYTE_MASK);
6419 OPCODE(ROTATE_MASK);
6420 OPCODE(REPLICATE);
6421 OPCODE(JOIN_DWORDS);
6422 OPCODE(SPLAT);
6423 OPCODE(MERGE_HIGH);
6424 OPCODE(MERGE_LOW);
6425 OPCODE(SHL_DOUBLE);
6426 OPCODE(PERMUTE_DWORDS);
6427 OPCODE(PERMUTE);
6428 OPCODE(PACK);
6429 OPCODE(PACKS_CC);
6430 OPCODE(PACKLS_CC);
6431 OPCODE(UNPACK_HIGH);
6432 OPCODE(UNPACKL_HIGH);
6433 OPCODE(UNPACK_LOW);
6434 OPCODE(UNPACKL_LOW);
6435 OPCODE(VSHL_BY_SCALAR);
6436 OPCODE(VSRL_BY_SCALAR);
6437 OPCODE(VSRA_BY_SCALAR);
6438 OPCODE(VROTL_BY_SCALAR);
6439 OPCODE(VSUM);
6440 OPCODE(VACC);
6441 OPCODE(VSCBI);
6442 OPCODE(VAC);
6443 OPCODE(VSBI);
6444 OPCODE(VACCC);
6445 OPCODE(VSBCBI);
6446 OPCODE(VICMPE);
6447 OPCODE(VICMPH);
6448 OPCODE(VICMPHL);
6449 OPCODE(VICMPES);
6450 OPCODE(VICMPHS);
6451 OPCODE(VICMPHLS);
6452 OPCODE(VFCMPE);
6453 OPCODE(STRICT_VFCMPE);
6454 OPCODE(STRICT_VFCMPES);
6455 OPCODE(VFCMPH);
6456 OPCODE(STRICT_VFCMPH);
6457 OPCODE(STRICT_VFCMPHS);
6458 OPCODE(VFCMPHE);
6459 OPCODE(STRICT_VFCMPHE);
6460 OPCODE(STRICT_VFCMPHES);
6461 OPCODE(VFCMPES);
6462 OPCODE(VFCMPHS);
6463 OPCODE(VFCMPHES);
6464 OPCODE(VFTCI);
6465 OPCODE(VEXTEND);
6466 OPCODE(STRICT_VEXTEND);
6467 OPCODE(VROUND);
6468 OPCODE(STRICT_VROUND);
6469 OPCODE(VTM);
6470 OPCODE(SCMP128HI);
6471 OPCODE(UCMP128HI);
6472 OPCODE(VFAE_CC);
6473 OPCODE(VFAEZ_CC);
6474 OPCODE(VFEE_CC);
6475 OPCODE(VFEEZ_CC);
6476 OPCODE(VFENE_CC);
6477 OPCODE(VFENEZ_CC);
6478 OPCODE(VISTR_CC);
6479 OPCODE(VSTRC_CC);
6480 OPCODE(VSTRCZ_CC);
6481 OPCODE(VSTRS_CC);
6482 OPCODE(VSTRSZ_CC);
6483 OPCODE(TDC);
6484 OPCODE(ATOMIC_SWAPW);
6485 OPCODE(ATOMIC_LOADW_ADD);
6486 OPCODE(ATOMIC_LOADW_SUB);
6487 OPCODE(ATOMIC_LOADW_AND);
6488 OPCODE(ATOMIC_LOADW_OR);
6489 OPCODE(ATOMIC_LOADW_XOR);
6490 OPCODE(ATOMIC_LOADW_NAND);
6491 OPCODE(ATOMIC_LOADW_MIN);
6492 OPCODE(ATOMIC_LOADW_MAX);
6493 OPCODE(ATOMIC_LOADW_UMIN);
6494 OPCODE(ATOMIC_LOADW_UMAX);
6495 OPCODE(ATOMIC_CMP_SWAPW);
6496 OPCODE(ATOMIC_CMP_SWAP);
6497 OPCODE(ATOMIC_LOAD_128);
6498 OPCODE(ATOMIC_STORE_128);
6499 OPCODE(ATOMIC_CMP_SWAP_128);
6500 OPCODE(LRV);
6501 OPCODE(STRV);
6502 OPCODE(VLER);
6503 OPCODE(VSTER);
6504 OPCODE(STCKF);
6506 OPCODE(ADA_ENTRY);
6507 }
6508 return nullptr;
6509#undef OPCODE
6510}
6511
6512// Return true if VT is a vector whose elements are a whole number of bytes
6513// in width. Also check for presence of vector support.
6514bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
6515 if (!Subtarget.hasVector())
6516 return false;
6517
6518 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
6519}
6520
6521// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
6522// producing a result of type ResVT. Op is a possibly bitcast version
6523// of the input vector and Index is the index (based on type VecVT) that
6524// should be extracted. Return the new extraction if a simplification
6525// was possible or if Force is true.
6526SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
6527 EVT VecVT, SDValue Op,
6528 unsigned Index,
6529 DAGCombinerInfo &DCI,
6530 bool Force) const {
6531 SelectionDAG &DAG = DCI.DAG;
6532
6533 // The number of bytes being extracted.
6534 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6535
6536 for (;;) {
6537 unsigned Opcode = Op.getOpcode();
6538 if (Opcode == ISD::BITCAST)
6539 // Look through bitcasts.
6540 Op = Op.getOperand(0);
6541 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
6542 canTreatAsByteVector(Op.getValueType())) {
6543 // Get a VPERM-like permute mask and see whether the bytes covered
6544 // by the extracted element are a contiguous sequence from one
6545 // source operand.
6547 if (!getVPermMask(Op, Bytes))
6548 break;
6549 int First;
6550 if (!getShuffleInput(Bytes, Index * BytesPerElement,
6551 BytesPerElement, First))
6552 break;
6553 if (First < 0)
6554 return DAG.getUNDEF(ResVT);
6555 // Make sure the contiguous sequence starts at a multiple of the
6556 // original element size.
6557 unsigned Byte = unsigned(First) % Bytes.size();
6558 if (Byte % BytesPerElement != 0)
6559 break;
6560 // We can get the extracted value directly from an input.
6561 Index = Byte / BytesPerElement;
6562 Op = Op.getOperand(unsigned(First) / Bytes.size());
6563 Force = true;
6564 } else if (Opcode == ISD::BUILD_VECTOR &&
6565 canTreatAsByteVector(Op.getValueType())) {
6566 // We can only optimize this case if the BUILD_VECTOR elements are
6567 // at least as wide as the extracted value.
6568 EVT OpVT = Op.getValueType();
6569 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6570 if (OpBytesPerElement < BytesPerElement)
6571 break;
6572 // Make sure that the least-significant bit of the extracted value
6573 // is the least significant bit of an input.
6574 unsigned End = (Index + 1) * BytesPerElement;
6575 if (End % OpBytesPerElement != 0)
6576 break;
6577 // We're extracting the low part of one operand of the BUILD_VECTOR.
6578 Op = Op.getOperand(End / OpBytesPerElement - 1);
6579 if (!Op.getValueType().isInteger()) {
6580 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
6581 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
6582 DCI.AddToWorklist(Op.getNode());
6583 }
6584 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
6585 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
6586 if (VT != ResVT) {
6587 DCI.AddToWorklist(Op.getNode());
6588 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
6589 }
6590 return Op;
6591 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6593 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6594 canTreatAsByteVector(Op.getValueType()) &&
6595 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
6596 // Make sure that only the unextended bits are significant.
6597 EVT ExtVT = Op.getValueType();
6598 EVT OpVT = Op.getOperand(0).getValueType();
6599 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
6600 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6601 unsigned Byte = Index * BytesPerElement;
6602 unsigned SubByte = Byte % ExtBytesPerElement;
6603 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
6604 if (SubByte < MinSubByte ||
6605 SubByte + BytesPerElement > ExtBytesPerElement)
6606 break;
6607 // Get the byte offset of the unextended element
6608 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
6609 // ...then add the byte offset relative to that element.
6610 Byte += SubByte - MinSubByte;
6611 if (Byte % BytesPerElement != 0)
6612 break;
6613 Op = Op.getOperand(0);
6614 Index = Byte / BytesPerElement;
6615 Force = true;
6616 } else
6617 break;
6618 }
6619 if (Force) {
6620 if (Op.getValueType() != VecVT) {
6621 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
6622 DCI.AddToWorklist(Op.getNode());
6623 }
6624 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
6625 DAG.getConstant(Index, DL, MVT::i32));
6626 }
6627 return SDValue();
6628}
6629
6630// Optimize vector operations in scalar value Op on the basis that Op
6631// is truncated to TruncVT.
6632SDValue SystemZTargetLowering::combineTruncateExtract(
6633 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
6634 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
6635 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
6636 // of type TruncVT.
6637 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6638 TruncVT.getSizeInBits() % 8 == 0) {
6639 SDValue Vec = Op.getOperand(0);
6640 EVT VecVT = Vec.getValueType();
6641 if (canTreatAsByteVector(VecVT)) {
6642 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
6643 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6644 unsigned TruncBytes = TruncVT.getStoreSize();
6645 if (BytesPerElement % TruncBytes == 0) {
6646 // Calculate the value of Y' in the above description. We are
6647 // splitting the original elements into Scale equal-sized pieces
6648 // and for truncation purposes want the last (least-significant)
6649 // of these pieces for IndexN. This is easiest to do by calculating
6650 // the start index of the following element and then subtracting 1.
6651 unsigned Scale = BytesPerElement / TruncBytes;
6652 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
6653
6654 // Defer the creation of the bitcast from X to combineExtract,
6655 // which might be able to optimize the extraction.
6656 VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
6657 VecVT.getStoreSize() / TruncBytes);
6658 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
6659 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
6660 }
6661 }
6662 }
6663 }
6664 return SDValue();
6665}
6666
6667SDValue SystemZTargetLowering::combineZERO_EXTEND(
6668 SDNode *N, DAGCombinerInfo &DCI) const {
6669 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
6670 SelectionDAG &DAG = DCI.DAG;
6671 SDValue N0 = N->getOperand(0);
6672 EVT VT = N->getValueType(0);
6674 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
6675 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6676 if (TrueOp && FalseOp) {
6677 SDLoc DL(N0);
6678 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
6679 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
6680 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
6681 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
6682 // If N0 has multiple uses, change other uses as well.
6683 if (!N0.hasOneUse()) {
6684 SDValue TruncSelect =
6685 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
6686 DCI.CombineTo(N0.getNode(), TruncSelect);
6687 }
6688 return NewSelect;
6689 }
6690 }
6691 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
6692 // of the result is smaller than the size of X and all the truncated bits
6693 // of X are already zero.
6694 if (N0.getOpcode() == ISD::XOR &&
6695 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
6696 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6697 N0.getOperand(1).getOpcode() == ISD::Constant) {
6698 SDValue X = N0.getOperand(0).getOperand(0);
6699 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
6700 KnownBits Known = DAG.computeKnownBits(X);
6701 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
6702 N0.getValueSizeInBits(),
6703 VT.getSizeInBits());
6704 if (TruncatedBits.isSubsetOf(Known.Zero)) {
6705 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
6707 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
6708 X, DAG.getConstant(Mask, SDLoc(N0), VT));
6709 }
6710 }
6711 }
6712
6713 return SDValue();
6714}
6715
6716SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
6717 SDNode *N, DAGCombinerInfo &DCI) const {
6718 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
6719 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
6720 // into (select_cc LHS, RHS, -1, 0, COND)
6721 SelectionDAG &DAG = DCI.DAG;
6722 SDValue N0 = N->getOperand(0);
6723 EVT VT = N->getValueType(0);
6724 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
6725 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
6726 N0 = N0.getOperand(0);
6727 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
6728 SDLoc DL(N0);
6729 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
6730 DAG.getAllOnesConstant(DL, VT),
6731 DAG.getConstant(0, DL, VT), N0.getOperand(2) };
6732 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
6733 }
6734 return SDValue();
6735}
6736
6737SDValue SystemZTargetLowering::combineSIGN_EXTEND(
6738 SDNode *N, DAGCombinerInfo &DCI) const {
6739 // Convert (sext (ashr (shl X, C1), C2)) to
6740 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
6741 // cheap as narrower ones.
6742 SelectionDAG &DAG = DCI.DAG;
6743 SDValue N0 = N->getOperand(0);
6744 EVT VT = N->getValueType(0);
6745 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
6746 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6747 SDValue Inner = N0.getOperand(0);
6748 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
6749 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
6750 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
6751 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
6752 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
6753 EVT ShiftVT = N0.getOperand(1).getValueType();
6754 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
6755 Inner.getOperand(0));
6756 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
6757 DAG.getConstant(NewShlAmt, SDLoc(Inner),
6758 ShiftVT));
6759 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
6760 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
6761 }
6762 }
6763 }
6764
6765 return SDValue();
6766}
6767
6768SDValue SystemZTargetLowering::combineMERGE(
6769 SDNode *N, DAGCombinerInfo &DCI) const {
6770 SelectionDAG &DAG = DCI.DAG;
6771 unsigned Opcode = N->getOpcode();
6772 SDValue Op0 = N->getOperand(0);
6773 SDValue Op1 = N->getOperand(1);
6774 if (Op0.getOpcode() == ISD::BITCAST)
6775 Op0 = Op0.getOperand(0);
6777 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
6778 // for v4f32.
6779 if (Op1 == N->getOperand(0))
6780 return Op1;
6781 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
6782 EVT VT = Op1.getValueType();
6783 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
6784 if (ElemBytes <= 4) {
6785 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
6788 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
6789 SystemZ::VectorBytes / ElemBytes / 2);
6790 if (VT != InVT) {
6791 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
6792 DCI.AddToWorklist(Op1.getNode());
6793 }
6794 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
6795 DCI.AddToWorklist(Op.getNode());
6796 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
6797 }
6798 }
6799 return SDValue();
6800}
6801
6802static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
6803 SDNode *&HiPart) {
6804 LoPart = HiPart = nullptr;
6805
6806 // Scan through all users.
6807 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
6808 UI != UIEnd; ++UI) {
6809 // Skip the uses of the chain.
6810 if (UI.getUse().getResNo() != 0)
6811 continue;
6812
6813 // Verify every user is a TRUNCATE to i64 of the low or high half.
6814 SDNode *User = *UI;
6815 bool IsLoPart = true;
6816 if (User->getOpcode() == ISD::SRL &&
6817 User->getOperand(1).getOpcode() == ISD::Constant &&
6818 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
6819 User = *User->use_begin();
6820 IsLoPart = false;
6821 }
6822 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)
6823 return false;
6824
6825 if (IsLoPart) {
6826 if (LoPart)
6827 return false;
6828 LoPart = User;
6829 } else {
6830 if (HiPart)
6831 return false;
6832 HiPart = User;
6833 }
6834 }
6835 return true;
6836}
6837
6838static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
6839 SDNode *&HiPart) {
6840 LoPart = HiPart = nullptr;
6841
6842 // Scan through all users.
6843 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
6844 UI != UIEnd; ++UI) {
6845 // Skip the uses of the chain.
6846 if (UI.getUse().getResNo() != 0)
6847 continue;
6848
6849 // Verify every user is an EXTRACT_SUBREG of the low or high half.
6850 SDNode *User = *UI;
6851 if (!User->hasOneUse() || !User->isMachineOpcode() ||
6852 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
6853 return false;
6854
6855 switch (User->getConstantOperandVal(1)) {
6856 case SystemZ::subreg_l64:
6857 if (LoPart)
6858 return false;
6859 LoPart = User;
6860 break;
6861 case SystemZ::subreg_h64:
6862 if (HiPart)
6863 return false;
6864 HiPart = User;
6865 break;
6866 default:
6867 return false;
6868 }
6869 }
6870 return true;
6871}
6872
6873SDValue SystemZTargetLowering::combineLOAD(
6874 SDNode *N, DAGCombinerInfo &DCI) const {
6875 SelectionDAG &DAG = DCI.DAG;
6876 EVT LdVT = N->getValueType(0);
6877 SDLoc DL(N);
6878
6879 // Replace a 128-bit load that is used solely to move its value into GPRs
6880 // by separate loads of both halves.
6881 LoadSDNode *LD = cast<LoadSDNode>(N);
6882 if (LD->isSimple() && ISD::isNormalLoad(LD)) {
6883 SDNode *LoPart, *HiPart;
6884 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||
6885 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {
6886 // Rewrite each extraction as an independent load.
6887 SmallVector<SDValue, 2> ArgChains;
6888 if (HiPart) {
6889 SDValue EltLoad = DAG.getLoad(
6890 HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(),
6891 LD->getPointerInfo(), LD->getOriginalAlign(),
6892 LD->getMemOperand()->getFlags(), LD->getAAInfo());
6893
6894 DCI.CombineTo(HiPart, EltLoad, true);
6895 ArgChains.push_back(EltLoad.getValue(1));
6896 }
6897 if (LoPart) {
6898 SDValue EltLoad = DAG.getLoad(
6899 LoPart->getValueType(0), DL, LD->getChain(),
6900 DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)),
6901 LD->getPointerInfo().getWithOffset(8), LD->getOriginalAlign(),
6902 LD->getMemOperand()->getFlags(), LD->getAAInfo());
6903
6904 DCI.CombineTo(LoPart, EltLoad, true);
6905 ArgChains.push_back(EltLoad.getValue(1));
6906 }
6907
6908 // Collect all chains via TokenFactor.
6909 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains);
6910 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
6911 DCI.AddToWorklist(Chain.getNode());
6912 return SDValue(N, 0);
6913 }
6914 }
6915
6916 if (LdVT.isVector() || LdVT.isInteger())
6917 return SDValue();
6918 // Transform a scalar load that is REPLICATEd as well as having other
6919 // use(s) to the form where the other use(s) use the first element of the
6920 // REPLICATE instead of the load. Otherwise instruction selection will not
6921 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
6922 // point loads.
6923
6924 SDValue Replicate;
6925 SmallVector<SDNode*, 8> OtherUses;
6926 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
6927 UI != UE; ++UI) {
6928 if (UI->getOpcode() == SystemZISD::REPLICATE) {
6929 if (Replicate)
6930 return SDValue(); // Should never happen
6931 Replicate = SDValue(*UI, 0);
6932 }
6933 else if (UI.getUse().getResNo() == 0)
6934 OtherUses.push_back(*UI);
6935 }
6936 if (!Replicate || OtherUses.empty())
6937 return SDValue();
6938
6939 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
6940 Replicate, DAG.getConstant(0, DL, MVT::i32));
6941 // Update uses of the loaded Value while preserving old chains.
6942 for (SDNode *U : OtherUses) {
6944 for (SDValue Op : U->ops())
6945 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
6946 DAG.UpdateNodeOperands(U, Ops);
6947 }
6948 return SDValue(N, 0);
6949}
6950
6951bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
6952 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
6953 return true;
6954 if (Subtarget.hasVectorEnhancements2())
6955 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
6956 return true;
6957 return false;
6958}
6959
6961 if (!VT.isVector() || !VT.isSimple() ||
6962 VT.getSizeInBits() != 128 ||
6963 VT.getScalarSizeInBits() % 8 != 0)
6964 return false;
6965
6966 unsigned NumElts = VT.getVectorNumElements();
6967 for (unsigned i = 0; i < NumElts; ++i) {
6968 if (M[i] < 0) continue; // ignore UNDEF indices
6969 if ((unsigned) M[i] != NumElts - 1 - i)
6970 return false;
6971 }
6972
6973 return true;
6974}
6975
6976static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
6977 for (auto *U : StoredVal->uses()) {
6978 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
6979 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
6980 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
6981 continue;
6982 } else if (isa<BuildVectorSDNode>(U)) {
6983 SDValue BuildVector = SDValue(U, 0);
6984 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
6985 isOnlyUsedByStores(BuildVector, DAG))
6986 continue;
6987 }
6988 return false;
6989 }
6990 return true;
6991}
6992
6993static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
6994 SDValue &HiPart) {
6995 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
6996 return false;
6997
6998 SDValue Op0 = Val.getOperand(0);
6999 SDValue Op1 = Val.getOperand(1);
7000
7001 if (Op0.getOpcode() == ISD::SHL)
7002 std::swap(Op0, Op1);
7003 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
7004 Op1.getOperand(1).getOpcode() != ISD::Constant ||
7005 Op1.getConstantOperandVal(1) != 64)
7006 return false;
7007 Op1 = Op1.getOperand(0);
7008
7009 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
7010 Op0.getOperand(0).getValueType() != MVT::i64)
7011 return false;
7012 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
7013 Op1.getOperand(0).getValueType() != MVT::i64)
7014 return false;
7015
7016 LoPart = Op0.getOperand(0);
7017 HiPart = Op1.getOperand(0);
7018 return true;
7019}
7020
7021static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
7022 SDValue &HiPart) {
7023 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
7024 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
7025 return false;
7026
7027 if (Val->getNumOperands() != 5 ||
7028 Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
7029 Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||
7030 Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)
7031 return false;
7032
7033 LoPart = Val->getOperand(1);
7034 HiPart = Val->getOperand(3);
7035 return true;
7036}
7037
7038SDValue SystemZTargetLowering::combineSTORE(
7039 SDNode *N, DAGCombinerInfo &DCI) const {
7040 SelectionDAG &DAG = DCI.DAG;
7041 auto *SN = cast<StoreSDNode>(N);
7042 auto &Op1 = N->getOperand(1);
7043 EVT MemVT = SN->getMemoryVT();
7044 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
7045 // for the extraction to be done on a vMiN value, so that we can use VSTE.
7046 // If X has wider elements then convert it to:
7047 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
7048 if (MemVT.isInteger() && SN->isTruncatingStore()) {
7049 if (SDValue Value =
7050 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
7051 DCI.AddToWorklist(Value.getNode());
7052
7053 // Rewrite the store with the new form of stored value.
7054 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
7055 SN->getBasePtr(), SN->getMemoryVT(),
7056 SN->getMemOperand());
7057 }
7058 }
7059 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
7060 if (!SN->isTruncatingStore() &&
7061 Op1.getOpcode() == ISD::BSWAP &&
7062 Op1.getNode()->hasOneUse() &&
7063 canLoadStoreByteSwapped(Op1.getValueType())) {
7064
7065 SDValue BSwapOp = Op1.getOperand(0);
7066
7067 if (BSwapOp.getValueType() == MVT::i16)
7068 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
7069
7070 SDValue Ops[] = {
7071 N->getOperand(0), BSwapOp, N->getOperand(2)
7072 };
7073
7074 return
7075 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
7076 Ops, MemVT, SN->getMemOperand());
7077 }
7078 // Combine STORE (element-swap) into VSTER
7079 if (!SN->isTruncatingStore() &&
7080 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
7081 Op1.getNode()->hasOneUse() &&
7082 Subtarget.hasVectorEnhancements2()) {
7083 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
7084 ArrayRef<int> ShuffleMask = SVN->getMask();
7085 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
7086 SDValue Ops[] = {
7087 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
7088 };
7089
7091 DAG.getVTList(MVT::Other),
7092 Ops, MemVT, SN->getMemOperand());
7093 }
7094 }
7095
7096 // Combine STORE (READCYCLECOUNTER) into STCKF.
7097 if (!SN->isTruncatingStore() &&
7099 Op1.hasOneUse() &&
7100 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
7101 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
7103 DAG.getVTList(MVT::Other),
7104 Ops, MemVT, SN->getMemOperand());
7105 }
7106
7107 // Transform a store of a 128-bit value moved from parts into two stores.
7108 if (SN->isSimple() && ISD::isNormalStore(SN)) {
7109 SDValue LoPart, HiPart;
7110 if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||
7111 (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {
7112 SDLoc DL(SN);
7113 SDValue Chain0 =
7114 DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(),
7115 SN->getPointerInfo(), SN->getOriginalAlign(),
7116 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7117 SDValue Chain1 =
7118 DAG.getStore(SN->getChain(), DL, LoPart,
7119 DAG.getObjectPtrOffset(DL, SN->getBasePtr(),
7121 SN->getPointerInfo().getWithOffset(8),
7122 SN->getOriginalAlign(),
7123 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7124
7125 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
7126 }
7127 }
7128
7129 // Replicate a reg or immediate with VREP instead of scalar multiply or
7130 // immediate load. It seems best to do this during the first DAGCombine as
7131 // it is straight-forward to handle the zero-extend node in the initial
7132 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
7133 // extracting an i16 element from a v16i8 vector).
7134 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
7135 isOnlyUsedByStores(Op1, DAG)) {
7136 SDValue Word = SDValue();
7137 EVT WordVT;
7138
7139 // Find a replicated immediate and return it if found in Word and its
7140 // type in WordVT.
7141 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
7142 // Some constants are better handled with a scalar store.
7143 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
7144 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
7145 return;
7146 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, C->getZExtValue()));
7147 if (VCI.isVectorConstantLegal(Subtarget) &&
7148 VCI.Opcode == SystemZISD::REPLICATE) {
7149 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
7150 WordVT = VCI.VecVT.getScalarType();
7151 }
7152 };
7153
7154 // Find a replicated register and return it if found in Word and its type
7155 // in WordVT.
7156 auto FindReplicatedReg = [&](SDValue MulOp) {
7157 EVT MulVT = MulOp.getValueType();
7158 if (MulOp->getOpcode() == ISD::MUL &&
7159 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
7160 // Find a zero extended value and its type.
7161 SDValue LHS = MulOp->getOperand(0);
7162 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
7163 WordVT = LHS->getOperand(0).getValueType();
7164 else if (LHS->getOpcode() == ISD::AssertZext)
7165 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
7166 else
7167 return;
7168 // Find a replicating constant, e.g. 0x00010001.
7169 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
7171 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
7172 if (VCI.isVectorConstantLegal(Subtarget) &&
7173 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
7174 WordVT == VCI.VecVT.getScalarType())
7175 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
7176 }
7177 }
7178 };
7179
7180 if (isa<BuildVectorSDNode>(Op1) &&
7181 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
7182 SDValue SplatVal = Op1->getOperand(0);
7183 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
7184 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
7185 else
7186 FindReplicatedReg(SplatVal);
7187 } else {
7188 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
7189 FindReplicatedImm(C, MemVT.getStoreSize());
7190 else
7191 FindReplicatedReg(Op1);
7192 }
7193
7194 if (Word != SDValue()) {
7195 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
7196 "Bad type handling");
7197 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
7198 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
7199 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
7200 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
7201 SN->getBasePtr(), SN->getMemOperand());
7202 }
7203 }
7204
7205 return SDValue();
7206}
7207
7208SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
7209 SDNode *N, DAGCombinerInfo &DCI) const {
7210 SelectionDAG &DAG = DCI.DAG;
7211 // Combine element-swap (LOAD) into VLER
7212 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7213 N->getOperand(0).hasOneUse() &&
7214 Subtarget.hasVectorEnhancements2()) {
7215 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
7216 ArrayRef<int> ShuffleMask = SVN->getMask();
7217 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
7218 SDValue Load = N->getOperand(0);
7219 LoadSDNode *LD = cast<LoadSDNode>(Load);
7220
7221 // Create the element-swapping load.
7222 SDValue Ops[] = {
7223 LD->getChain(), // Chain
7224 LD->getBasePtr() // Ptr
7225 };
7226 SDValue ESLoad =
7228 DAG.getVTList(LD->getValueType(0), MVT::Other),
7229 Ops, LD->getMemoryVT(), LD->getMemOperand());
7230
7231 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
7232 // by the load dead.
7233 DCI.CombineTo(N, ESLoad);
7234
7235 // Next, combine the load away, we give it a bogus result value but a real
7236 // chain result. The result value is dead because the shuffle is dead.
7237 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
7238
7239 // Return N so it doesn't get rechecked!
7240 return SDValue(N, 0);
7241 }
7242 }
7243
7244 return SDValue();
7245}
7246
7247SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
7248 SDNode *N, DAGCombinerInfo &DCI) const {
7249 SelectionDAG &DAG = DCI.DAG;
7250
7251 if (!Subtarget.hasVector())
7252 return SDValue();
7253
7254 // Look through bitcasts that retain the number of vector elements.
7255 SDValue Op = N->getOperand(0);
7256 if (Op.getOpcode() == ISD::BITCAST &&
7257 Op.getValueType().isVector() &&
7258 Op.getOperand(0).getValueType().isVector() &&
7259 Op.getValueType().getVectorNumElements() ==
7260 Op.getOperand(0).getValueType().getVectorNumElements())
7261 Op = Op.getOperand(0);
7262
7263 // Pull BSWAP out of a vector extraction.
7264 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
7265 EVT VecVT = Op.getValueType();
7266 EVT EltVT = VecVT.getVectorElementType();
7267 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
7268 Op.getOperand(0), N->getOperand(1));
7269 DCI.AddToWorklist(Op.getNode());
7270 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
7271 if (EltVT != N->getValueType(0)) {
7272 DCI.AddToWorklist(Op.getNode());
7273 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
7274 }
7275 return Op;
7276 }
7277
7278 // Try to simplify a vector extraction.
7279 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
7280 SDValue Op0 = N->getOperand(0);
7281 EVT VecVT = Op0.getValueType();
7282 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
7283 IndexN->getZExtValue(), DCI, false);
7284 }
7285 return SDValue();
7286}
7287
7288SDValue SystemZTargetLowering::combineJOIN_DWORDS(
7289 SDNode *N, DAGCombinerInfo &DCI) const {
7290 SelectionDAG &DAG = DCI.DAG;
7291 // (join_dwords X, X) == (replicate X)
7292 if (N->getOperand(0) == N->getOperand(1))
7293 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
7294 N->getOperand(0));
7295 return SDValue();
7296}
7297
7299 SDValue Chain1 = N1->getOperand(0);
7300 SDValue Chain2 = N2->getOperand(0);
7301
7302 // Trivial case: both nodes take the same chain.
7303 if (Chain1 == Chain2)
7304 return Chain1;
7305
7306 // FIXME - we could handle more complex cases via TokenFactor,
7307 // assuming we can verify that this would not create a cycle.
7308 return SDValue();
7309}
7310
7311SDValue SystemZTargetLowering::combineFP_ROUND(
7312 SDNode *N, DAGCombinerInfo &DCI) const {
7313
7314 if (!Subtarget.hasVector())
7315 return SDValue();
7316
7317 // (fpround (extract_vector_elt X 0))
7318 // (fpround (extract_vector_elt X 1)) ->
7319 // (extract_vector_elt (VROUND X) 0)
7320 // (extract_vector_elt (VROUND X) 2)
7321 //
7322 // This is a special case since the target doesn't really support v2f32s.
7323 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7324 SelectionDAG &DAG = DCI.DAG;
7325 SDValue Op0 = N->getOperand(OpNo);
7326 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
7328 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
7329 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7330 Op0.getConstantOperandVal(1) == 0) {
7331 SDValue Vec = Op0.getOperand(0);
7332 for (auto *U : Vec->uses()) {
7333 if (U != Op0.getNode() && U->hasOneUse() &&
7334 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7335 U->getOperand(0) == Vec &&
7336 U->getOperand(1).getOpcode() == ISD::Constant &&
7337 U->getConstantOperandVal(1) == 1) {
7338 SDValue OtherRound = SDValue(*U->use_begin(), 0);
7339 if (OtherRound.getOpcode() == N->getOpcode() &&
7340 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
7341 OtherRound.getValueType() == MVT::f32) {
7342 SDValue VRound, Chain;
7343 if (N->isStrictFPOpcode()) {
7344 Chain = MergeInputChains(N, OtherRound.getNode());
7345 if (!Chain)
7346 continue;
7348 {MVT::v4f32, MVT::Other}, {Chain, Vec});
7349 Chain = VRound.getValue(1);
7350 } else
7351 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
7352 MVT::v4f32, Vec);
7353 DCI.AddToWorklist(VRound.getNode());
7354 SDValue Extract1 =
7355 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
7356 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
7357 DCI.AddToWorklist(Extract1.getNode());
7358 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
7359 if (Chain)
7360 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
7361 SDValue Extract0 =
7362 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
7363 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7364 if (Chain)
7365 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7366 N->getVTList(), Extract0, Chain);
7367 return Extract0;
7368 }
7369 }
7370 }
7371 }
7372 return SDValue();
7373}
7374
7375SDValue SystemZTargetLowering::combineFP_EXTEND(
7376 SDNode *N, DAGCombinerInfo &DCI) const {
7377
7378 if (!Subtarget.hasVector())
7379 return SDValue();
7380
7381 // (fpextend (extract_vector_elt X 0))
7382 // (fpextend (extract_vector_elt X 2)) ->
7383 // (extract_vector_elt (VEXTEND X) 0)
7384 // (extract_vector_elt (VEXTEND X) 1)
7385 //
7386 // This is a special case since the target doesn't really support v2f32s.
7387 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7388 SelectionDAG &DAG = DCI.DAG;
7389 SDValue Op0 = N->getOperand(OpNo);
7390 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
7392 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
7393 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7394 Op0.getConstantOperandVal(1) == 0) {
7395 SDValue Vec = Op0.getOperand(0);
7396 for (auto *U : Vec->uses()) {
7397 if (U != Op0.getNode() && U->hasOneUse() &&
7398 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7399 U->getOperand(0) == Vec &&
7400 U->getOperand(1).getOpcode() == ISD::Constant &&
7401 U->getConstantOperandVal(1) == 2) {
7402 SDValue OtherExtend = SDValue(*U->use_begin(), 0);
7403 if (OtherExtend.getOpcode() == N->getOpcode() &&
7404 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
7405 OtherExtend.getValueType() == MVT::f64) {
7406 SDValue VExtend, Chain;
7407 if (N->isStrictFPOpcode()) {
7408 Chain = MergeInputChains(N, OtherExtend.getNode());
7409 if (!Chain)
7410 continue;
7411 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
7412 {MVT::v2f64, MVT::Other}, {Chain, Vec});
7413 Chain = VExtend.getValue(1);
7414 } else
7415 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
7416 MVT::v2f64, Vec);
7417 DCI.AddToWorklist(VExtend.getNode());
7418 SDValue Extract1 =
7419 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
7420 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
7421 DCI.AddToWorklist(Extract1.getNode());
7422 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
7423 if (Chain)
7424 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
7425 SDValue Extract0 =
7426 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
7427 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7428 if (Chain)
7429 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7430 N->getVTList(), Extract0, Chain);
7431 return Extract0;
7432 }
7433 }
7434 }
7435 }
7436 return SDValue();
7437}
7438
7439SDValue SystemZTargetLowering::combineINT_TO_FP(
7440 SDNode *N, DAGCombinerInfo &DCI) const {
7441 if (DCI.Level != BeforeLegalizeTypes)
7442 return SDValue();
7443 SelectionDAG &DAG = DCI.DAG;
7444 LLVMContext &Ctx = *DAG.getContext();
7445 unsigned Opcode = N->getOpcode();
7446 EVT OutVT = N->getValueType(0);
7447 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
7448 SDValue Op = N->getOperand(0);
7449 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
7450 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
7451
7452 // Insert an extension before type-legalization to avoid scalarization, e.g.:
7453 // v2f64 = uint_to_fp v2i16
7454 // =>
7455 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
7456 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
7457 OutScalarBits <= 64) {
7458 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
7459 EVT ExtVT = EVT::getVectorVT(
7460 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
7461 unsigned ExtOpcode =
7463 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
7464 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
7465 }
7466 return SDValue();
7467}
7468
7469SDValue SystemZTargetLowering::combineBSWAP(
7470 SDNode *N, DAGCombinerInfo &DCI) const {
7471 SelectionDAG &DAG = DCI.DAG;
7472 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
7473 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7474 N->getOperand(0).hasOneUse() &&
7475 canLoadStoreByteSwapped(N->getValueType(0))) {
7476 SDValue Load = N->getOperand(0);
7477 LoadSDNode *LD = cast<LoadSDNode>(Load);
7478
7479 // Create the byte-swapping load.
7480 SDValue Ops[] = {
7481 LD->getChain(), // Chain
7482 LD->getBasePtr() // Ptr
7483 };
7484 EVT LoadVT = N->getValueType(0);
7485 if (LoadVT == MVT::i16)
7486 LoadVT = MVT::i32;
7487 SDValue BSLoad =
7489 DAG.getVTList(LoadVT, MVT::Other),
7490 Ops, LD->getMemoryVT(), LD->getMemOperand());
7491
7492 // If this is an i16 load, insert the truncate.
7493 SDValue ResVal = BSLoad;
7494 if (N->getValueType(0) == MVT::i16)
7495 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
7496
7497 // First, combine the bswap away. This makes the value produced by the
7498 // load dead.
7499 DCI.CombineTo(N, ResVal);
7500
7501 // Next, combine the load away, we give it a bogus result value but a real
7502 // chain result. The result value is dead because the bswap is dead.
7503 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
7504
7505 // Return N so it doesn't get rechecked!
7506 return SDValue(N, 0);
7507 }
7508
7509 // Look through bitcasts that retain the number of vector elements.
7510 SDValue Op = N->getOperand(0);
7511 if (Op.getOpcode() == ISD::BITCAST &&
7512 Op.getValueType().isVector() &&
7513 Op.getOperand(0).getValueType().isVector() &&
7514 Op.getValueType().getVectorNumElements() ==
7515 Op.getOperand(0).getValueType().getVectorNumElements())
7516 Op = Op.getOperand(0);
7517
7518 // Push BSWAP into a vector insertion if at least one side then simplifies.
7519 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
7520 SDValue Vec = Op.getOperand(0);
7521 SDValue Elt = Op.getOperand(1);
7522 SDValue Idx = Op.getOperand(2);
7523
7525 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
7527 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
7528 (canLoadStoreByteSwapped(N->getValueType(0)) &&
7529 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
7530 EVT VecVT = N->getValueType(0);
7531 EVT EltVT = N->getValueType(0).getVectorElementType();
7532 if (VecVT != Vec.getValueType()) {
7533 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
7534 DCI.AddToWorklist(Vec.getNode());
7535 }
7536 if (EltVT != Elt.getValueType()) {
7537 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
7538 DCI.AddToWorklist(Elt.getNode());
7539 }
7540 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
7541 DCI.AddToWorklist(Vec.getNode());
7542 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
7543 DCI.AddToWorklist(Elt.getNode());
7544 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
7545 Vec, Elt, Idx);
7546 }
7547 }
7548
7549 // Push BSWAP into a vector shuffle if at least one side then simplifies.
7550 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
7551 if (SV && Op.hasOneUse()) {
7552 SDValue Op0 = Op.getOperand(0);
7553 SDValue Op1 = Op.getOperand(1);
7554
7556 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
7558 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
7559 EVT VecVT = N->getValueType(0);
7560 if (VecVT != Op0.getValueType()) {
7561 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
7562 DCI.AddToWorklist(Op0.getNode());
7563 }
7564 if (VecVT != Op1.getValueType()) {
7565 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
7566 DCI.AddToWorklist(Op1.getNode());
7567 }
7568 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
7569 DCI.AddToWorklist(Op0.getNode());
7570 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
7571 DCI.AddToWorklist(Op1.getNode());
7572 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
7573 }
7574 }
7575
7576 return SDValue();
7577}
7578
7579static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
7580 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
7581 // set by the CCReg instruction using the CCValid / CCMask masks,
7582 // If the CCReg instruction is itself a ICMP testing the condition
7583 // code set by some other instruction, see whether we can directly
7584 // use that condition code.
7585
7586 // Verify that we have an ICMP against some constant.
7587 if (CCValid != SystemZ::CCMASK_ICMP)
7588 return false;
7589 auto *ICmp = CCReg.getNode();
7590 if (ICmp->getOpcode() != SystemZISD::ICMP)
7591 return false;
7592 auto *CompareLHS = ICmp->getOperand(0).getNode();
7593 auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
7594 if (!CompareRHS)
7595 return false;
7596
7597 // Optimize the case where CompareLHS is a SELECT_CCMASK.
7598 if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
7599 // Verify that we have an appropriate mask for a EQ or NE comparison.
7600 bool Invert = false;
7601 if (CCMask == SystemZ::CCMASK_CMP_NE)
7602 Invert = !Invert;
7603 else if (CCMask != SystemZ::CCMASK_CMP_EQ)
7604 return false;
7605
7606 // Verify that the ICMP compares against one of select values.
7607 auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
7608 if (!TrueVal)
7609 return false;
7610 auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7611 if (!FalseVal)
7612 return false;
7613 if (CompareRHS->getZExtValue() == FalseVal->getZExtValue())
7614 Invert = !Invert;
7615 else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue())
7616 return false;
7617
7618 // Compute the effective CC mask for the new branch or select.
7619 auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
7620 auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
7621 if (!NewCCValid || !NewCCMask)
7622 return false;
7623 CCValid = NewCCValid->getZExtValue();
7624 CCMask = NewCCMask->getZExtValue();
7625 if (Invert)
7626 CCMask ^= CCValid;
7627
7628 // Return the updated CCReg link.
7629 CCReg = CompareLHS->getOperand(4);
7630 return true;
7631 }
7632
7633 // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
7634 if (CompareLHS->getOpcode() == ISD::SRA) {
7635 auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7636 if (!SRACount || SRACount->getZExtValue() != 30)
7637 return false;
7638 auto *SHL = CompareLHS->getOperand(0).getNode();
7639 if (SHL->getOpcode() != ISD::SHL)
7640 return false;
7641 auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
7642 if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
7643 return false;
7644 auto *IPM = SHL->getOperand(0).getNode();
7645 if (IPM->getOpcode() != SystemZISD::IPM)
7646 return false;
7647
7648 // Avoid introducing CC spills (because SRA would clobber CC).
7649 if (!CompareLHS->hasOneUse())
7650 return false;
7651 // Verify that the ICMP compares against zero.
7652 if (CompareRHS->getZExtValue() != 0)
7653 return false;
7654
7655 // Compute the effective CC mask for the new branch or select.
7656 CCMask = SystemZ::reverseCCMask(CCMask);
7657
7658 // Return the updated CCReg link.
7659 CCReg = IPM->getOperand(0);
7660 return true;
7661 }
7662
7663 return false;
7664}
7665
7666SDValue SystemZTargetLowering::combineBR_CCMASK(
7667 SDNode *N, DAGCombinerInfo &DCI) const {
7668 SelectionDAG &DAG = DCI.DAG;
7669
7670 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
7671 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
7672 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
7673 if (!CCValid || !CCMask)
7674 return SDValue();
7675
7676 int CCValidVal = CCValid->getZExtValue();
7677 int CCMaskVal = CCMask->getZExtValue();
7678 SDValue Chain = N->getOperand(0);
7679 SDValue CCReg = N->getOperand(4);
7680
7681 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7682 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
7683 Chain,
7684 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7685 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7686 N->getOperand(3), CCReg);
7687 return SDValue();
7688}
7689
7690SDValue SystemZTargetLowering::combineSELECT_CCMASK(
7691 SDNode *N, DAGCombinerInfo &DCI) const {
7692 SelectionDAG &DAG = DCI.DAG;
7693
7694 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
7695 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
7696 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
7697 if (!CCValid || !CCMask)
7698 return SDValue();
7699
7700 int CCValidVal = CCValid->getZExtValue();
7701 int CCMaskVal = CCMask->getZExtValue();
7702 SDValue CCReg = N->getOperand(4);
7703
7704 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7705 return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
7706 N->getOperand(0), N->getOperand(1),
7707 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7708 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7709 CCReg);
7710 return SDValue();
7711}
7712
7713
7714SDValue SystemZTargetLowering::combineGET_CCMASK(
7715 SDNode *N, DAGCombinerInfo &DCI) const {
7716
7717 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
7718 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
7719 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
7720 if (!CCValid || !CCMask)
7721 return SDValue();
7722 int CCValidVal = CCValid->getZExtValue();
7723 int CCMaskVal = CCMask->getZExtValue();
7724
7725 SDValue Select = N->getOperand(0);
7726 if (Select->getOpcode() == ISD::TRUNCATE)
7727 Select = Select->getOperand(0);
7728 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
7729 return SDValue();
7730
7731 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
7732 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
7733 if (!SelectCCValid || !SelectCCMask)
7734 return SDValue();
7735 int SelectCCValidVal = SelectCCValid->getZExtValue();
7736 int SelectCCMaskVal = SelectCCMask->getZExtValue();
7737
7738 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
7739 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
7740 if (!TrueVal || !FalseVal)
7741 return SDValue();
7742 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
7743 ;
7744 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
7745 SelectCCMaskVal ^= SelectCCValidVal;
7746 else
7747 return SDValue();
7748
7749 if (SelectCCValidVal & ~CCValidVal)
7750 return SDValue();
7751 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
7752 return SDValue();
7753
7754 return Select->getOperand(4);
7755}
7756
7757SDValue SystemZTargetLowering::combineIntDIVREM(
7758 SDNode *N, DAGCombinerInfo &DCI) const {
7759 SelectionDAG &DAG = DCI.DAG;
7760 EVT VT = N->getValueType(0);
7761 // In the case where the divisor is a vector of constants a cheaper
7762 // sequence of instructions can replace the divide. BuildSDIV is called to
7763 // do this during DAG combining, but it only succeeds when it can build a
7764 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
7765 // since it is not Legal but Custom it can only happen before
7766 // legalization. Therefore we must scalarize this early before Combine
7767 // 1. For widened vectors, this is already the result of type legalization.
7768 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
7769 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
7770 return DAG.UnrollVectorOp(N);
7771 return SDValue();
7772}
7773
7774SDValue SystemZTargetLowering::combineINTRINSIC(
7775 SDNode *N, DAGCombinerInfo &DCI) const {
7776 SelectionDAG &DAG = DCI.DAG;
7777
7778 unsigned Id = N->getConstantOperandVal(1);
7779 switch (Id) {
7780 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
7781 // or larger is simply a vector load.
7782 case Intrinsic::s390_vll:
7783 case Intrinsic::s390_vlrl:
7784 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
7785 if (C->getZExtValue() >= 15)
7786 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
7787 N->getOperand(3), MachinePointerInfo());
7788 break;
7789 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
7790 case Intrinsic::s390_vstl:
7791 case Intrinsic::s390_vstrl:
7792 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
7793 if (C->getZExtValue() >= 15)
7794 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
7795 N->getOperand(4), MachinePointerInfo());
7796 break;
7797 }
7798
7799 return SDValue();
7800}
7801
7802SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
7803 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
7804 return N->getOperand(0);
7805 return N;
7806}
7807
7809 DAGCombinerInfo &DCI) const {
7810 switch(N->getOpcode()) {
7811 default: break;
7812 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
7813 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
7814 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
7816 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
7817 case ISD::LOAD: return combineLOAD(N, DCI);
7818 case ISD::STORE: return combineSTORE(N, DCI);
7819 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
7820 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
7821 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
7823 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
7825 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
7826 case ISD::SINT_TO_FP:
7827 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
7828 case ISD::BSWAP: return combineBSWAP(N, DCI);
7829 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
7830 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
7831 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
7832 case ISD::SDIV:
7833 case ISD::UDIV:
7834 case ISD::SREM:
7835 case ISD::UREM: return combineIntDIVREM(N, DCI);
7837 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
7838 }
7839
7840 return SDValue();
7841}
7842
7843// Return the demanded elements for the OpNo source operand of Op. DemandedElts
7844// are for Op.
7845static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
7846 unsigned OpNo) {
7847 EVT VT = Op.getValueType();
7848 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
7849 APInt SrcDemE;
7850 unsigned Opcode = Op.getOpcode();
7851 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7852 unsigned Id = Op.getConstantOperandVal(0);
7853 switch (Id) {
7854 case Intrinsic::s390_vpksh: // PACKS
7855 case Intrinsic::s390_vpksf:
7856 case Intrinsic::s390_vpksg:
7857 case Intrinsic::s390_vpkshs: // PACKS_CC
7858 case Intrinsic::s390_vpksfs:
7859 case Intrinsic::s390_vpksgs:
7860 case Intrinsic::s390_vpklsh: // PACKLS
7861 case Intrinsic::s390_vpklsf:
7862 case Intrinsic::s390_vpklsg:
7863 case Intrinsic::s390_vpklshs: // PACKLS_CC
7864 case Intrinsic::s390_vpklsfs:
7865 case Intrinsic::s390_vpklsgs:
7866 // VECTOR PACK truncates the elements of two source vectors into one.
7867 SrcDemE = DemandedElts;
7868 if (OpNo == 2)
7869 SrcDemE.lshrInPlace(NumElts / 2);
7870 SrcDemE = SrcDemE.trunc(NumElts / 2);
7871 break;
7872 // VECTOR UNPACK extends half the elements of the source vector.
7873 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7874 case Intrinsic::s390_vuphh:
7875 case Intrinsic::s390_vuphf:
7876 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
7877 case Intrinsic::s390_vuplhh:
7878 case Intrinsic::s390_vuplhf:
7879 SrcDemE = APInt(NumElts * 2, 0);
7880 SrcDemE.insertBits(DemandedElts, 0);
7881 break;
7882 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7883 case Intrinsic::s390_vuplhw:
7884 case Intrinsic::s390_vuplf:
7885 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
7886 case Intrinsic::s390_vupllh:
7887 case Intrinsic::s390_vupllf:
7888 SrcDemE = APInt(NumElts * 2, 0);
7889 SrcDemE.insertBits(DemandedElts, NumElts);
7890 break;
7891 case Intrinsic::s390_vpdi: {
7892 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
7893 SrcDemE = APInt(NumElts, 0);
7894 if (!DemandedElts[OpNo - 1])
7895 break;
7896 unsigned Mask = Op.getConstantOperandVal(3);
7897 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
7898 // Demand input element 0 or 1, given by the mask bit value.
7899 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
7900 break;
7901 }
7902 case Intrinsic::s390_vsldb: {
7903 // VECTOR SHIFT LEFT DOUBLE BY BYTE
7904 assert(VT == MVT::v16i8 && "Unexpected type.");
7905 unsigned FirstIdx = Op.getConstantOperandVal(3);
7906 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
7907 unsigned NumSrc0Els = 16 - FirstIdx;
7908 SrcDemE = APInt(NumElts, 0);
7909 if (OpNo == 1) {
7910 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
7911 SrcDemE.insertBits(DemEls, FirstIdx);
7912 } else {
7913 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
7914 SrcDemE.insertBits(DemEls, 0);
7915 }
7916 break;
7917 }
7918 case Intrinsic::s390_vperm:
7919 SrcDemE = APInt(NumElts, -1);
7920 break;
7921 default:
7922 llvm_unreachable("Unhandled intrinsic.");
7923 break;
7924 }
7925 } else {
7926 switch (Opcode) {
7928 // Scalar operand.
7929 SrcDemE = APInt(1, 1);
7930 break;
7932 SrcDemE = DemandedElts;
7933 break;
7934 default:
7935 llvm_unreachable("Unhandled opcode.");
7936 break;
7937 }
7938 }
7939 return SrcDemE;
7940}
7941
7942static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
7943 const APInt &DemandedElts,
7944 const SelectionDAG &DAG, unsigned Depth,
7945 unsigned OpNo) {
7946 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
7947 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
7948 KnownBits LHSKnown =
7949 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
7950 KnownBits RHSKnown =
7951 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
7952 Known = LHSKnown.intersectWith(RHSKnown);
7953}
7954
7955void
7957 KnownBits &Known,
7958 const APInt &DemandedElts,
7959 const SelectionDAG &DAG,
7960 unsigned Depth) const {
7961 Known.resetAll();
7962
7963 // Intrinsic CC result is returned in the two low bits.
7964 unsigned tmp0, tmp1; // not used
7965 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) {
7966 Known.Zero.setBitsFrom(2);
7967 return;
7968 }
7969 EVT VT = Op.getValueType();
7970 if (Op.getResNo() != 0 || VT == MVT::Untyped)
7971 return;
7972 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
7973 "KnownBits does not match VT in bitwidth");
7974 assert ((!VT.isVector() ||
7975 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
7976 "DemandedElts does not match VT number of elements");
7977 unsigned BitWidth = Known.getBitWidth();
7978 unsigned Opcode = Op.getOpcode();
7979 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7980 bool IsLogical = false;
7981 unsigned Id = Op.getConstantOperandVal(0);
7982 switch (Id) {
7983 case Intrinsic::s390_vpksh: // PACKS
7984 case Intrinsic::s390_vpksf:
7985 case Intrinsic::s390_vpksg:
7986 case Intrinsic::s390_vpkshs: // PACKS_CC
7987 case Intrinsic::s390_vpksfs:
7988 case Intrinsic::s390_vpksgs:
7989 case Intrinsic::s390_vpklsh: // PACKLS
7990 case Intrinsic::s390_vpklsf:
7991 case Intrinsic::s390_vpklsg:
7992 case Intrinsic::s390_vpklshs: // PACKLS_CC
7993 case Intrinsic::s390_vpklsfs:
7994 case Intrinsic::s390_vpklsgs:
7995 case Intrinsic::s390_vpdi:
7996 case Intrinsic::s390_vsldb:
7997 case Intrinsic::s390_vperm:
7998 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
7999 break;
8000 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
8001 case Intrinsic::s390_vuplhh:
8002 case Intrinsic::s390_vuplhf:
8003 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
8004 case Intrinsic::s390_vupllh:
8005 case Intrinsic::s390_vupllf:
8006 IsLogical = true;
8007 [[fallthrough]];
8008 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
8009 case Intrinsic::s390_vuphh:
8010 case Intrinsic::s390_vuphf:
8011 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
8012 case Intrinsic::s390_vuplhw:
8013 case Intrinsic::s390_vuplf: {
8014 SDValue SrcOp = Op.getOperand(1);
8015 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
8016 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
8017 if (IsLogical) {
8018 Known = Known.zext(BitWidth);
8019 } else
8020 Known = Known.sext(BitWidth);
8021 break;
8022 }
8023 default:
8024 break;
8025 }
8026 } else {
8027 switch (Opcode) {
8030 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
8031 break;
8032 case SystemZISD::REPLICATE: {
8033 SDValue SrcOp = Op.getOperand(0);
8034 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
8035 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
8036 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
8037 break;
8038 }
8039 default:
8040 break;
8041 }
8042 }
8043
8044 // Known has the width of the source operand(s). Adjust if needed to match
8045 // the passed bitwidth.
8046 if (Known.getBitWidth() != BitWidth)
8047 Known = Known.anyextOrTrunc(BitWidth);
8048}
8049
8050static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
8051 const SelectionDAG &DAG, unsigned Depth,
8052 unsigned OpNo) {
8053 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
8054 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
8055 if (LHS == 1) return 1; // Early out.
8056 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
8057 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
8058 if (RHS == 1) return 1; // Early out.
8059 unsigned Common = std::min(LHS, RHS);
8060 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
8061 EVT VT = Op.getValueType();
8062 unsigned VTBits = VT.getScalarSizeInBits();
8063 if (SrcBitWidth > VTBits) { // PACK
8064 unsigned SrcExtraBits = SrcBitWidth - VTBits;
8065 if (Common > SrcExtraBits)
8066 return (Common - SrcExtraBits);
8067 return 1;
8068 }
8069 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
8070 return Common;
8071}
8072
8073unsigned
8075 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
8076 unsigned Depth) const {
8077 if (Op.getResNo() != 0)
8078 return 1;
8079 unsigned Opcode = Op.getOpcode();
8080 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
8081 unsigned Id = Op.getConstantOperandVal(0);
8082 switch (Id) {
8083 case Intrinsic::s390_vpksh: // PACKS
8084 case Intrinsic::s390_vpksf:
8085 case Intrinsic::s390_vpksg:
8086 case Intrinsic::s390_vpkshs: // PACKS_CC
8087 case Intrinsic::s390_vpksfs:
8088 case Intrinsic::s390_vpksgs:
8089 case Intrinsic::s390_vpklsh: // PACKLS
8090 case Intrinsic::s390_vpklsf:
8091 case Intrinsic::s390_vpklsg:
8092 case Intrinsic::s390_vpklshs: // PACKLS_CC
8093 case Intrinsic::s390_vpklsfs:
8094 case Intrinsic::s390_vpklsgs:
8095 case Intrinsic::s390_vpdi:
8096 case Intrinsic::s390_vsldb:
8097 case Intrinsic::s390_vperm:
8098 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
8099 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
8100 case Intrinsic::s390_vuphh:
8101 case Intrinsic::s390_vuphf:
8102 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
8103 case Intrinsic::s390_vuplhw:
8104 case Intrinsic::s390_vuplf: {
8105 SDValue PackedOp = Op.getOperand(1);
8106 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
8107 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
8108 EVT VT = Op.getValueType();
8109 unsigned VTBits = VT.getScalarSizeInBits();
8110 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
8111 return Tmp;
8112 }
8113 default:
8114 break;
8115 }
8116 } else {
8117 switch (Opcode) {
8119 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
8120 default:
8121 break;
8122 }
8123 }
8124
8125 return 1;
8126}
8127
8130 const APInt &DemandedElts, const SelectionDAG &DAG,
8131 bool PoisonOnly, unsigned Depth) const {
8132 switch (Op->getOpcode()) {
8135 return true;
8136 }
8137 return false;
8138}
8139
8140unsigned
8142 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
8143 unsigned StackAlign = TFI->getStackAlignment();
8144 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
8145 "Unexpected stack alignment");
8146 // The default stack probe size is 4096 if the function has no
8147 // stack-probe-size attribute.
8148 unsigned StackProbeSize =
8149 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
8150 // Round down to the stack alignment.
8151 StackProbeSize &= ~(StackAlign - 1);
8152 return StackProbeSize ? StackProbeSize : StackAlign;
8153}
8154
8155//===----------------------------------------------------------------------===//
8156// Custom insertion
8157//===----------------------------------------------------------------------===//
8158
8159// Force base value Base into a register before MI. Return the register.
8161 const SystemZInstrInfo *TII) {
8162 MachineBasicBlock *MBB = MI.getParent();
8163 MachineFunction &MF = *MBB->getParent();
8165
8166 if (Base.isReg()) {
8167 // Copy Base into a new virtual register to help register coalescing in
8168 // cases with multiple uses.
8169 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8170 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
8171 .add(Base);
8172 return Reg;
8173 }
8174
8175 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8176 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
8177 .add(Base)
8178 .addImm(0)
8179 .addReg(0);
8180 return Reg;
8181}
8182
8183// The CC operand of MI might be missing a kill marker because there
8184// were multiple uses of CC, and ISel didn't know which to mark.
8185// Figure out whether MI should have had a kill marker.
8187 // Scan forward through BB for a use/def of CC.
8189 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
8190 const MachineInstr& mi = *miI;
8191 if (mi.readsRegister(SystemZ::CC, /*TRI=*/nullptr))
8192 return false;
8193 if (mi.definesRegister(SystemZ::CC, /*TRI=*/nullptr))
8194 break; // Should have kill-flag - update below.
8195 }
8196
8197 // If we hit the end of the block, check whether CC is live into a
8198 // successor.
8199 if (miI == MBB->end()) {
8200 for (const MachineBasicBlock *Succ : MBB->successors())
8201 if (Succ->isLiveIn(SystemZ::CC))
8202 return false;
8203 }
8204
8205 return true;
8206}
8207
8208// Return true if it is OK for this Select pseudo-opcode to be cascaded
8209// together with other Select pseudo-opcodes into a single basic-block with
8210// a conditional jump around it.
8212 switch (MI.getOpcode()) {
8213 case SystemZ::Select32:
8214 case SystemZ::Select64:
8215 case SystemZ::Select128:
8216 case SystemZ::SelectF32:
8217 case SystemZ::SelectF64:
8218 case SystemZ::SelectF128:
8219 case SystemZ::SelectVR32:
8220 case SystemZ::SelectVR64:
8221 case SystemZ::SelectVR128:
8222 return true;
8223
8224 default:
8225 return false;
8226 }
8227}
8228
8229// Helper function, which inserts PHI functions into SinkMBB:
8230// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
8231// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
8233 MachineBasicBlock *TrueMBB,
8234 MachineBasicBlock *FalseMBB,
8235 MachineBasicBlock *SinkMBB) {
8236 MachineFunction *MF = TrueMBB->getParent();
8238
8239 MachineInstr *FirstMI = Selects.front();
8240 unsigned CCValid = FirstMI->getOperand(3).getImm();
8241 unsigned CCMask = FirstMI->getOperand(4).getImm();
8242
8243 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
8244
8245 // As we are creating the PHIs, we have to be careful if there is more than
8246 // one. Later Selects may reference the results of earlier Selects, but later
8247 // PHIs have to reference the individual true/false inputs from earlier PHIs.
8248 // That also means that PHI construction must work forward from earlier to
8249 // later, and that the code must maintain a mapping from earlier PHI's
8250 // destination registers, and the registers that went into the PHI.
8252
8253 for (auto *MI : Selects) {
8254 Register DestReg = MI->getOperand(0).getReg();
8255 Register TrueReg = MI->getOperand(1).getReg();
8256 Register FalseReg = MI->getOperand(2).getReg();
8257
8258 // If this Select we are generating is the opposite condition from
8259 // the jump we generated, then we have to swap the operands for the
8260 // PHI that is going to be generated.
8261 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
8262 std::swap(TrueReg, FalseReg);
8263
8264 if (RegRewriteTable.contains(TrueReg))
8265 TrueReg = RegRewriteTable[TrueReg].first;
8266
8267 if (RegRewriteTable.contains(FalseReg))
8268 FalseReg = RegRewriteTable[FalseReg].second;
8269
8270 DebugLoc DL = MI->getDebugLoc();
8271 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
8272 .addReg(TrueReg).addMBB(TrueMBB)
8273 .addReg(FalseReg).addMBB(FalseMBB);
8274
8275 // Add this PHI to the rewrite table.
8276 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
8277 }
8278
8280}
8281
8283SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
8284 MachineBasicBlock *BB) const {
8285 MachineFunction &MF = *BB->getParent();
8286 MachineFrameInfo &MFI = MF.getFrameInfo();
8287 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
8288 assert(TFL->hasReservedCallFrame(MF) &&
8289 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
8290 (void)TFL;
8291 // Get the MaxCallFrameSize value and erase MI since it serves no further
8292 // purpose as the call frame is statically reserved in the prolog. Set
8293 // AdjustsStack as MI is *not* mapped as a frame instruction.
8294 uint32_t NumBytes = MI.getOperand(0).getImm();
8295 if (NumBytes > MFI.getMaxCallFrameSize())
8296 MFI.setMaxCallFrameSize(NumBytes);
8297 MFI.setAdjustsStack(true);
8298
8299 MI.eraseFromParent();
8300 return BB;
8301}
8302
8303// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
8305SystemZTargetLowering::emitSelect(MachineInstr &MI,
8306 MachineBasicBlock *MBB) const {
8307 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
8308 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8309
8310 unsigned CCValid = MI.getOperand(3).getImm();
8311 unsigned CCMask = MI.getOperand(4).getImm();
8312
8313 // If we have a sequence of Select* pseudo instructions using the
8314 // same condition code value, we want to expand all of them into
8315 // a single pair of basic blocks using the same condition.
8318 Selects.push_back(&MI);
8319 unsigned Count = 0;
8320 for (MachineInstr &NextMI : llvm::make_range(
8321 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
8322 if (isSelectPseudo(NextMI)) {
8323 assert(NextMI.getOperand(3).getImm() == CCValid &&
8324 "Bad CCValid operands since CC was not redefined.");
8325 if (NextMI.getOperand(4).getImm() == CCMask ||
8326 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
8327 Selects.push_back(&NextMI);
8328 continue;
8329 }
8330 break;
8331 }
8332 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) ||
8333 NextMI.usesCustomInsertionHook())
8334 break;
8335 bool User = false;
8336 for (auto *SelMI : Selects)
8337 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
8338 User = true;
8339 break;
8340 }
8341 if (NextMI.isDebugInstr()) {
8342 if (User) {
8343 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
8344 DbgValues.push_back(&NextMI);
8345 }
8346 } else if (User || ++Count > 20)
8347 break;
8348 }
8349
8350 MachineInstr *LastMI = Selects.back();
8351 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) ||
8352 checkCCKill(*LastMI, MBB));
8353 MachineBasicBlock *StartMBB = MBB;
8355 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8356
8357 // Unless CC was killed in the last Select instruction, mark it as
8358 // live-in to both FalseMBB and JoinMBB.
8359 if (!CCKilled) {
8360 FalseMBB->addLiveIn(SystemZ::CC);
8361 JoinMBB->addLiveIn(SystemZ::CC);
8362 }
8363
8364 // StartMBB:
8365 // BRC CCMask, JoinMBB
8366 // # fallthrough to FalseMBB
8367 MBB = StartMBB;
8368 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8369 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8370 MBB->addSuccessor(JoinMBB);
8371 MBB->addSuccessor(FalseMBB);
8372
8373 // FalseMBB:
8374 // # fallthrough to JoinMBB
8375 MBB = FalseMBB;
8376 MBB->addSuccessor(JoinMBB);
8377
8378 // JoinMBB:
8379 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
8380 // ...
8381 MBB = JoinMBB;
8382 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
8383 for (auto *SelMI : Selects)
8384 SelMI->eraseFromParent();
8385
8387 for (auto *DbgMI : DbgValues)
8388 MBB->splice(InsertPos, StartMBB, DbgMI);
8389
8390 return JoinMBB;
8391}
8392
8393// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
8394// StoreOpcode is the store to use and Invert says whether the store should
8395// happen when the condition is false rather than true. If a STORE ON
8396// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
8397MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
8399 unsigned StoreOpcode,
8400 unsigned STOCOpcode,
8401 bool Invert) const {
8402 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8403
8404 Register SrcReg = MI.getOperand(0).getReg();
8405 MachineOperand Base = MI.getOperand(1);
8406 int64_t Disp = MI.getOperand(2).getImm();
8407 Register IndexReg = MI.getOperand(3).getReg();
8408 unsigned CCValid = MI.getOperand(4).getImm();
8409 unsigned CCMask = MI.getOperand(5).getImm();
8410 DebugLoc DL = MI.getDebugLoc();
8411
8412 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
8413
8414 // ISel pattern matching also adds a load memory operand of the same
8415 // address, so take special care to find the storing memory operand.
8416 MachineMemOperand *MMO = nullptr;
8417 for (auto *I : MI.memoperands())
8418 if (I->isStore()) {
8419 MMO = I;
8420 break;
8421 }
8422
8423 // Use STOCOpcode if possible. We could use different store patterns in
8424 // order to avoid matching the index register, but the performance trade-offs
8425 // might be more complicated in that case.
8426 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
8427 if (Invert)
8428 CCMask ^= CCValid;
8429
8430 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
8431 .addReg(SrcReg)
8432 .add(Base)
8433 .addImm(Disp)
8434 .addImm(CCValid)
8435 .addImm(CCMask)
8436 .addMemOperand(MMO);
8437
8438 MI.eraseFromParent();
8439 return MBB;
8440 }
8441
8442 // Get the condition needed to branch around the store.
8443 if (!Invert)
8444 CCMask ^= CCValid;
8445
8446 MachineBasicBlock *StartMBB = MBB;
8448 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8449
8450 // Unless CC was killed in the CondStore instruction, mark it as
8451 // live-in to both FalseMBB and JoinMBB.
8452 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) &&
8453 !checkCCKill(MI, JoinMBB)) {
8454 FalseMBB->addLiveIn(SystemZ::CC);
8455 JoinMBB->addLiveIn(SystemZ::CC);
8456 }
8457
8458 // StartMBB:
8459 // BRC CCMask, JoinMBB
8460 // # fallthrough to FalseMBB
8461 MBB = StartMBB;
8462 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8463 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8464 MBB->addSuccessor(JoinMBB);
8465 MBB->addSuccessor(FalseMBB);
8466
8467 // FalseMBB:
8468 // store %SrcReg, %Disp(%Index,%Base)
8469 // # fallthrough to JoinMBB
8470 MBB = FalseMBB;
8471 BuildMI(MBB, DL, TII->get(StoreOpcode))
8472 .addReg(SrcReg)
8473 .add(Base)
8474 .addImm(Disp)
8475 .addReg(IndexReg)
8476 .addMemOperand(MMO);
8477 MBB->addSuccessor(JoinMBB);
8478
8479 MI.eraseFromParent();
8480 return JoinMBB;
8481}
8482
8483// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
8485SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
8487 bool Unsigned) const {
8488 MachineFunction &MF = *MBB->getParent();
8489 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8491
8492 // Synthetic instruction to compare 128-bit values.
8493 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
8494 Register Op0 = MI.getOperand(0).getReg();
8495 Register Op1 = MI.getOperand(1).getReg();
8496
8497 MachineBasicBlock *StartMBB = MBB;
8499 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
8500
8501 // StartMBB:
8502 //
8503 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
8504 // Swap the inputs to get:
8505 // CC 1 if high(Op0) > high(Op1)
8506 // CC 2 if high(Op0) < high(Op1)
8507 // CC 0 if high(Op0) == high(Op1)
8508 //
8509 // If CC != 0, we'd done, so jump over the next instruction.
8510 //
8511 // VEC[L]G Op1, Op0
8512 // JNE JoinMBB
8513 // # fallthrough to HiEqMBB
8514 MBB = StartMBB;
8515 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
8516 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
8517 .addReg(Op1).addReg(Op0);
8518 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8520 MBB->addSuccessor(JoinMBB);
8521 MBB->addSuccessor(HiEqMBB);
8522
8523 // HiEqMBB:
8524 //
8525 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
8526 // Since we already know the high parts are equal, the CC
8527 // result will only depend on the low parts:
8528 // CC 1 if low(Op0) > low(Op1)
8529 // CC 3 if low(Op0) <= low(Op1)
8530 //
8531 // VCHLGS Tmp, Op0, Op1
8532 // # fallthrough to JoinMBB
8533 MBB = HiEqMBB;
8534 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
8535 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
8536 .addReg(Op0).addReg(Op1);
8537 MBB->addSuccessor(JoinMBB);
8538
8539 // Mark CC as live-in to JoinMBB.
8540 JoinMBB->addLiveIn(SystemZ::CC);
8541
8542 MI.eraseFromParent();
8543 return JoinMBB;
8544}
8545
8546// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
8547// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
8548// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
8549// whether the field should be inverted after performing BinOpcode (e.g. for
8550// NAND).
8551MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
8552 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
8553 bool Invert) const {
8554 MachineFunction &MF = *MBB->getParent();
8555 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8557
8558 // Extract the operands. Base can be a register or a frame index.
8559 // Src2 can be a register or immediate.
8560 Register Dest = MI.getOperand(0).getReg();
8561 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8562 int64_t Disp = MI.getOperand(2).getImm();
8563 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
8564 Register BitShift = MI.getOperand(4).getReg();
8565 Register NegBitShift = MI.getOperand(5).getReg();
8566 unsigned BitSize = MI.getOperand(6).getImm();
8567 DebugLoc DL = MI.getDebugLoc();
8568
8569 // Get the right opcodes for the displacement.
8570 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8571 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8572 assert(LOpcode && CSOpcode && "Displacement out of range");
8573
8574 // Create virtual registers for temporary results.
8575 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8576 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8577 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8578 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8579 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8580
8581 // Insert a basic block for the main loop.
8582 MachineBasicBlock *StartMBB = MBB;
8584 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8585
8586 // StartMBB:
8587 // ...
8588 // %OrigVal = L Disp(%Base)
8589 // # fall through to LoopMBB
8590 MBB = StartMBB;
8591 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
8592 MBB->addSuccessor(LoopMBB);
8593
8594 // LoopMBB:
8595 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
8596 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
8597 // %RotatedNewVal = OP %RotatedOldVal, %Src2
8598 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
8599 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
8600 // JNE LoopMBB
8601 // # fall through to DoneMBB
8602 MBB = LoopMBB;
8603 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8604 .addReg(OrigVal).addMBB(StartMBB)
8605 .addReg(Dest).addMBB(LoopMBB);
8606 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
8607 .addReg(OldVal).addReg(BitShift).addImm(0);
8608 if (Invert) {
8609 // Perform the operation normally and then invert every bit of the field.
8610 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8611 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
8612 // XILF with the upper BitSize bits set.
8613 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
8614 .addReg(Tmp).addImm(-1U << (32 - BitSize));
8615 } else if (BinOpcode)
8616 // A simply binary operation.
8617 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
8618 .addReg(RotatedOldVal)
8619 .add(Src2);
8620 else
8621 // Use RISBG to rotate Src2 into position and use it to replace the
8622 // field in RotatedOldVal.
8623 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
8624 .addReg(RotatedOldVal).addReg(Src2.getReg())
8625 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
8626 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
8627 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
8628 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
8629 .addReg(OldVal)
8630 .addReg(NewVal)
8631 .add(Base)
8632 .addImm(Disp);
8633 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8635 MBB->addSuccessor(LoopMBB);
8636 MBB->addSuccessor(DoneMBB);
8637
8638 MI.eraseFromParent();
8639 return DoneMBB;
8640}
8641
8642// Implement EmitInstrWithCustomInserter for subword pseudo
8643// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
8644// instruction that should be used to compare the current field with the
8645// minimum or maximum value. KeepOldMask is the BRC condition-code mask
8646// for when the current field should be kept.
8647MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
8648 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
8649 unsigned KeepOldMask) const {
8650 MachineFunction &MF = *MBB->getParent();
8651 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8653
8654 // Extract the operands. Base can be a register or a frame index.
8655 Register Dest = MI.getOperand(0).getReg();
8656 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8657 int64_t Disp = MI.getOperand(2).getImm();
8658 Register Src2 = MI.getOperand(3).getReg();
8659 Register BitShift = MI.getOperand(4).getReg();
8660 Register NegBitShift = MI.getOperand(5).getReg();
8661 unsigned BitSize = MI.getOperand(6).getImm();
8662 DebugLoc DL = MI.getDebugLoc();
8663
8664 // Get the right opcodes for the displacement.
8665 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8666 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8667 assert(LOpcode && CSOpcode && "Displacement out of range");
8668
8669 // Create virtual registers for temporary results.
8670 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8671 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8672 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8673 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8674 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8675 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8676
8677 // Insert 3 basic blocks for the loop.
8678 MachineBasicBlock *StartMBB = MBB;
8680 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8681 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
8682 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
8683
8684 // StartMBB:
8685 // ...
8686 // %OrigVal = L Disp(%Base)
8687 // # fall through to LoopMBB
8688 MBB = StartMBB;
8689 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
8690 MBB->addSuccessor(LoopMBB);
8691
8692 // LoopMBB:
8693 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
8694 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
8695 // CompareOpcode %RotatedOldVal, %Src2
8696 // BRC KeepOldMask, UpdateMBB
8697 MBB = LoopMBB;
8698 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8699 .addReg(OrigVal).addMBB(StartMBB)
8700 .addReg(Dest).addMBB(UpdateMBB);
8701 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
8702 .addReg(OldVal).addReg(BitShift).addImm(0);
8703 BuildMI(MBB, DL, TII->get(CompareOpcode))
8704 .addReg(RotatedOldVal).addReg(Src2);
8705 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8706 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
8707 MBB->addSuccessor(UpdateMBB);
8708 MBB->addSuccessor(UseAltMBB);
8709
8710 // UseAltMBB:
8711 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
8712 // # fall through to UpdateMBB
8713 MBB = UseAltMBB;
8714 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
8715 .addReg(RotatedOldVal).addReg(Src2)
8716 .addImm(32).addImm(31 + BitSize).addImm(0);
8717 MBB->addSuccessor(UpdateMBB);
8718
8719 // UpdateMBB:
8720 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
8721 // [ %RotatedAltVal, UseAltMBB ]
8722 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
8723 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
8724 // JNE LoopMBB
8725 // # fall through to DoneMBB
8726 MBB = UpdateMBB;
8727 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
8728 .addReg(RotatedOldVal).addMBB(LoopMBB)
8729 .addReg(RotatedAltVal).addMBB(UseAltMBB);
8730 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
8731 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
8732 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
8733 .addReg(OldVal)
8734 .addReg(NewVal)
8735 .add(Base)
8736 .addImm(Disp);
8737 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8739 MBB->addSuccessor(LoopMBB);
8740 MBB->addSuccessor(DoneMBB);
8741
8742 MI.eraseFromParent();
8743 return DoneMBB;
8744}
8745
8746// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
8747// instruction MI.
8749SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
8750 MachineBasicBlock *MBB) const {
8751 MachineFunction &MF = *MBB->getParent();
8752 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8754
8755 // Extract the operands. Base can be a register or a frame index.
8756 Register Dest = MI.getOperand(0).getReg();
8757 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8758 int64_t Disp = MI.getOperand(2).getImm();
8759 Register CmpVal = MI.getOperand(3).getReg();
8760 Register OrigSwapVal = MI.getOperand(4).getReg();
8761 Register BitShift = MI.getOperand(5).getReg();
8762 Register NegBitShift = MI.getOperand(6).getReg();
8763 int64_t BitSize = MI.getOperand(7).getImm();
8764 DebugLoc DL = MI.getDebugLoc();
8765
8766 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
8767
8768 // Get the right opcodes for the displacement and zero-extension.
8769 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8770 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8771 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
8772 assert(LOpcode && CSOpcode && "Displacement out of range");
8773
8774 // Create virtual registers for temporary results.
8775 Register OrigOldVal = MRI.createVirtualRegister(RC);
8776 Register OldVal = MRI.createVirtualRegister(RC);
8777 Register SwapVal = MRI.createVirtualRegister(RC);
8778 Register StoreVal = MRI.createVirtualRegister(RC);
8779 Register OldValRot = MRI.createVirtualRegister(RC);
8780 Register RetryOldVal = MRI.createVirtualRegister(RC);
8781 Register RetrySwapVal = MRI.createVirtualRegister(RC);
8782
8783 // Insert 2 basic blocks for the loop.
8784 MachineBasicBlock *StartMBB = MBB;
8786 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8787 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
8788
8789 // StartMBB:
8790 // ...
8791 // %OrigOldVal = L Disp(%Base)
8792 // # fall through to LoopMBB
8793 MBB = StartMBB;
8794 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
8795 .add(Base)
8796 .addImm(Disp)
8797 .addReg(0);
8798 MBB->addSuccessor(LoopMBB);
8799
8800 // LoopMBB:
8801 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
8802 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
8803 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
8804 // ^^ The low BitSize bits contain the field
8805 // of interest.
8806 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
8807 // ^^ Replace the upper 32-BitSize bits of the
8808 // swap value with those that we loaded and rotated.
8809 // %Dest = LL[CH] %OldValRot
8810 // CR %Dest, %CmpVal
8811 // JNE DoneMBB
8812 // # Fall through to SetMBB
8813 MBB = LoopMBB;
8814 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8815 .addReg(OrigOldVal).addMBB(StartMBB)
8816 .addReg(RetryOldVal).addMBB(SetMBB);
8817 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
8818 .addReg(OrigSwapVal).addMBB(StartMBB)
8819 .addReg(RetrySwapVal).addMBB(SetMBB);
8820 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
8821 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
8822 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
8823 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
8824 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
8825 .addReg(OldValRot);
8826 BuildMI(MBB, DL, TII->get(SystemZ::CR))
8827 .addReg(Dest).addReg(CmpVal);
8828 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8831 MBB->addSuccessor(DoneMBB);
8832 MBB->addSuccessor(SetMBB);
8833
8834 // SetMBB:
8835 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
8836 // ^^ Rotate the new field to its proper position.
8837 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
8838 // JNE LoopMBB
8839 // # fall through to ExitMBB
8840 MBB = SetMBB;
8841 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
8842 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
8843 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
8844 .addReg(OldVal)
8845 .addReg(StoreVal)
8846 .add(Base)
8847 .addImm(Disp);
8848 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8850 MBB->addSuccessor(LoopMBB);
8851 MBB->addSuccessor(DoneMBB);
8852
8853 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
8854 // to the block after the loop. At this point, CC may have been defined
8855 // either by the CR in LoopMBB or by the CS in SetMBB.
8856 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))
8857 DoneMBB->addLiveIn(SystemZ::CC);
8858
8859 MI.eraseFromParent();
8860 return DoneMBB;
8861}
8862
8863// Emit a move from two GR64s to a GR128.
8865SystemZTargetLowering::emitPair128(MachineInstr &MI,
8866 MachineBasicBlock *MBB) const {
8867 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8868 const DebugLoc &DL = MI.getDebugLoc();
8869
8870 Register Dest = MI.getOperand(0).getReg();
8871 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest)
8872 .add(MI.getOperand(1))
8873 .addImm(SystemZ::subreg_h64)
8874 .add(MI.getOperand(2))
8875 .addImm(SystemZ::subreg_l64);
8876 MI.eraseFromParent();
8877 return MBB;
8878}
8879
8880// Emit an extension from a GR64 to a GR128. ClearEven is true
8881// if the high register of the GR128 value must be cleared or false if
8882// it's "don't care".
8883MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
8885 bool ClearEven) const {
8886 MachineFunction &MF = *MBB->getParent();
8887 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8889 DebugLoc DL = MI.getDebugLoc();
8890
8891 Register Dest = MI.getOperand(0).getReg();
8892 Register Src = MI.getOperand(1).getReg();
8893 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8894
8895 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
8896 if (ClearEven) {
8897 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8898 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
8899
8900 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
8901 .addImm(0);
8902 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
8903 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
8904 In128 = NewIn128;
8905 }
8906 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
8907 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
8908
8909 MI.eraseFromParent();
8910 return MBB;
8911}
8912
8914SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
8916 unsigned Opcode, bool IsMemset) const {
8917 MachineFunction &MF = *MBB->getParent();
8918 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8920 DebugLoc DL = MI.getDebugLoc();
8921
8922 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
8923 uint64_t DestDisp = MI.getOperand(1).getImm();
8924 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
8925 uint64_t SrcDisp;
8926
8927 // Fold the displacement Disp if it is out of range.
8928 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
8929 if (!isUInt<12>(Disp)) {
8930 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8931 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
8932 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
8933 .add(Base).addImm(Disp).addReg(0);
8934 Base = MachineOperand::CreateReg(Reg, false);
8935 Disp = 0;
8936 }
8937 };
8938
8939 if (!IsMemset) {
8940 SrcBase = earlyUseOperand(MI.getOperand(2));
8941 SrcDisp = MI.getOperand(3).getImm();
8942 } else {
8943 SrcBase = DestBase;
8944 SrcDisp = DestDisp++;
8945 foldDisplIfNeeded(DestBase, DestDisp);
8946 }
8947
8948 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
8949 bool IsImmForm = LengthMO.isImm();
8950 bool IsRegForm = !IsImmForm;
8951
8952 // Build and insert one Opcode of Length, with special treatment for memset.
8953 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
8955 MachineOperand DBase, uint64_t DDisp,
8957 unsigned Length) -> void {
8958 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
8959 if (IsMemset) {
8960 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
8961 if (ByteMO.isImm())
8962 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
8963 .add(SBase).addImm(SDisp).add(ByteMO);
8964 else
8965 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
8966 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
8967 if (--Length == 0)
8968 return;
8969 }
8970 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
8971 .add(DBase).addImm(DDisp).addImm(Length)
8972 .add(SBase).addImm(SDisp)
8973 .setMemRefs(MI.memoperands());
8974 };
8975
8976 bool NeedsLoop = false;
8977 uint64_t ImmLength = 0;
8978 Register LenAdjReg = SystemZ::NoRegister;
8979 if (IsImmForm) {
8980 ImmLength = LengthMO.getImm();
8981 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
8982 if (ImmLength == 0) {
8983 MI.eraseFromParent();
8984 return MBB;
8985 }
8986 if (Opcode == SystemZ::CLC) {
8987 if (ImmLength > 3 * 256)
8988 // A two-CLC sequence is a clear win over a loop, not least because
8989 // it needs only one branch. A three-CLC sequence needs the same
8990 // number of branches as a loop (i.e. 2), but is shorter. That
8991 // brings us to lengths greater than 768 bytes. It seems relatively
8992 // likely that a difference will be found within the first 768 bytes,
8993 // so we just optimize for the smallest number of branch
8994 // instructions, in order to avoid polluting the prediction buffer
8995 // too much.
8996 NeedsLoop = true;
8997 } else if (ImmLength > 6 * 256)
8998 // The heuristic we use is to prefer loops for anything that would
8999 // require 7 or more MVCs. With these kinds of sizes there isn't much
9000 // to choose between straight-line code and looping code, since the
9001 // time will be dominated by the MVCs themselves.
9002 NeedsLoop = true;
9003 } else {
9004 NeedsLoop = true;
9005 LenAdjReg = LengthMO.getReg();
9006 }
9007
9008 // When generating more than one CLC, all but the last will need to
9009 // branch to the end when a difference is found.
9010 MachineBasicBlock *EndMBB =
9011 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
9013 : nullptr);
9014
9015 if (NeedsLoop) {
9016 Register StartCountReg =
9017 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
9018 if (IsImmForm) {
9019 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
9020 ImmLength &= 255;
9021 } else {
9022 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
9023 .addReg(LenAdjReg)
9024 .addReg(0)
9025 .addImm(8);
9026 }
9027
9028 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
9029 auto loadZeroAddress = [&]() -> MachineOperand {
9030 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9031 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
9032 return MachineOperand::CreateReg(Reg, false);
9033 };
9034 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
9035 DestBase = loadZeroAddress();
9036 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
9037 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
9038
9039 MachineBasicBlock *StartMBB = nullptr;
9040 MachineBasicBlock *LoopMBB = nullptr;
9041 MachineBasicBlock *NextMBB = nullptr;
9042 MachineBasicBlock *DoneMBB = nullptr;
9043 MachineBasicBlock *AllDoneMBB = nullptr;
9044
9045 Register StartSrcReg = forceReg(MI, SrcBase, TII);
9046 Register StartDestReg =
9047 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
9048
9049 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
9050 Register ThisSrcReg = MRI.createVirtualRegister(RC);
9051 Register ThisDestReg =
9052 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
9053 Register NextSrcReg = MRI.createVirtualRegister(RC);
9054 Register NextDestReg =
9055 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
9056 RC = &SystemZ::GR64BitRegClass;
9057 Register ThisCountReg = MRI.createVirtualRegister(RC);
9058 Register NextCountReg = MRI.createVirtualRegister(RC);
9059
9060 if (IsRegForm) {
9061 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
9062 StartMBB = SystemZ::emitBlockAfter(MBB);
9063 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9064 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
9065 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
9066
9067 // MBB:
9068 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
9069 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9070 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
9071 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9073 .addMBB(AllDoneMBB);
9074 MBB->addSuccessor(AllDoneMBB);
9075 if (!IsMemset)
9076 MBB->addSuccessor(StartMBB);
9077 else {
9078 // MemsetOneCheckMBB:
9079 // # Jump to MemsetOneMBB for a memset of length 1, or
9080 // # fall thru to StartMBB.
9081 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
9082 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
9083 MBB->addSuccessor(MemsetOneCheckMBB);
9084 MBB = MemsetOneCheckMBB;
9085 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9086 .addReg(LenAdjReg).addImm(-1);
9087 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9089 .addMBB(MemsetOneMBB);
9090 MBB->addSuccessor(MemsetOneMBB, {10, 100});
9091 MBB->addSuccessor(StartMBB, {90, 100});
9092
9093 // MemsetOneMBB:
9094 // # Jump back to AllDoneMBB after a single MVI or STC.
9095 MBB = MemsetOneMBB;
9096 insertMemMemOp(MBB, MBB->end(),
9097 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
9098 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
9099 1);
9100 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
9101 MBB->addSuccessor(AllDoneMBB);
9102 }
9103
9104 // StartMBB:
9105 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
9106 MBB = StartMBB;
9107 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9108 .addReg(StartCountReg).addImm(0);
9109 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9111 .addMBB(DoneMBB);
9112 MBB->addSuccessor(DoneMBB);
9113 MBB->addSuccessor(LoopMBB);
9114 }
9115 else {
9116 StartMBB = MBB;
9117 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
9118 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9119 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
9120
9121 // StartMBB:
9122 // # fall through to LoopMBB
9123 MBB->addSuccessor(LoopMBB);
9124
9125 DestBase = MachineOperand::CreateReg(NextDestReg, false);
9126 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
9127 if (EndMBB && !ImmLength)
9128 // If the loop handled the whole CLC range, DoneMBB will be empty with
9129 // CC live-through into EndMBB, so add it as live-in.
9130 DoneMBB->addLiveIn(SystemZ::CC);
9131 }
9132
9133 // LoopMBB:
9134 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
9135 // [ %NextDestReg, NextMBB ]
9136 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
9137 // [ %NextSrcReg, NextMBB ]
9138 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
9139 // [ %NextCountReg, NextMBB ]
9140 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
9141 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
9142 // ( JLH EndMBB )
9143 //
9144 // The prefetch is used only for MVC. The JLH is used only for CLC.
9145 MBB = LoopMBB;
9146 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
9147 .addReg(StartDestReg).addMBB(StartMBB)
9148 .addReg(NextDestReg).addMBB(NextMBB);
9149 if (!HaveSingleBase)
9150 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
9151 .addReg(StartSrcReg).addMBB(StartMBB)
9152 .addReg(NextSrcReg).addMBB(NextMBB);
9153 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
9154 .addReg(StartCountReg).addMBB(StartMBB)
9155 .addReg(NextCountReg).addMBB(NextMBB);
9156 if (Opcode == SystemZ::MVC)
9157 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
9159 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
9160 insertMemMemOp(MBB, MBB->end(),
9161 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
9162 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
9163 if (EndMBB) {
9164 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9166 .addMBB(EndMBB);
9167 MBB->addSuccessor(EndMBB);
9168 MBB->addSuccessor(NextMBB);
9169 }
9170
9171 // NextMBB:
9172 // %NextDestReg = LA 256(%ThisDestReg)
9173 // %NextSrcReg = LA 256(%ThisSrcReg)
9174 // %NextCountReg = AGHI %ThisCountReg, -1
9175 // CGHI %NextCountReg, 0
9176 // JLH LoopMBB
9177 // # fall through to DoneMBB
9178 //
9179 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
9180 MBB = NextMBB;
9181 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
9182 .addReg(ThisDestReg).addImm(256).addReg(0);
9183 if (!HaveSingleBase)
9184 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
9185 .addReg(ThisSrcReg).addImm(256).addReg(0);
9186 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
9187 .addReg(ThisCountReg).addImm(-1);
9188 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9189 .addReg(NextCountReg).addImm(0);
9190 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9192 .addMBB(LoopMBB);
9193 MBB->addSuccessor(LoopMBB);
9194 MBB->addSuccessor(DoneMBB);
9195
9196 MBB = DoneMBB;
9197 if (IsRegForm) {
9198 // DoneMBB:
9199 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
9200 // # Use EXecute Relative Long for the remainder of the bytes. The target
9201 // instruction of the EXRL will have a length field of 1 since 0 is an
9202 // illegal value. The number of bytes processed becomes (%LenAdjReg &
9203 // 0xff) + 1.
9204 // # Fall through to AllDoneMBB.
9205 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9206 Register RemDestReg = HaveSingleBase ? RemSrcReg
9207 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9208 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
9209 .addReg(StartDestReg).addMBB(StartMBB)
9210 .addReg(NextDestReg).addMBB(NextMBB);
9211 if (!HaveSingleBase)
9212 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
9213 .addReg(StartSrcReg).addMBB(StartMBB)
9214 .addReg(NextSrcReg).addMBB(NextMBB);
9215 if (IsMemset)
9216 insertMemMemOp(MBB, MBB->end(),
9217 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
9218 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
9219 MachineInstrBuilder EXRL_MIB =
9220 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
9221 .addImm(Opcode)
9222 .addReg(LenAdjReg)
9223 .addReg(RemDestReg).addImm(DestDisp)
9224 .addReg(RemSrcReg).addImm(SrcDisp);
9225 MBB->addSuccessor(AllDoneMBB);
9226 MBB = AllDoneMBB;
9227 if (Opcode != SystemZ::MVC) {
9228 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
9229 if (EndMBB)
9230 MBB->addLiveIn(SystemZ::CC);
9231 }
9232 }
9234 }
9235
9236 // Handle any remaining bytes with straight-line code.
9237 while (ImmLength > 0) {
9238 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
9239 // The previous iteration might have created out-of-range displacements.
9240 // Apply them using LA/LAY if so.
9241 foldDisplIfNeeded(DestBase, DestDisp);
9242 foldDisplIfNeeded(SrcBase, SrcDisp);
9243 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
9244 DestDisp += ThisLength;
9245 SrcDisp += ThisLength;
9246 ImmLength -= ThisLength;
9247 // If there's another CLC to go, branch to the end if a difference
9248 // was found.
9249 if (EndMBB && ImmLength > 0) {
9251 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9253 .addMBB(EndMBB);
9254 MBB->addSuccessor(EndMBB);
9255 MBB->addSuccessor(NextMBB);
9256 MBB = NextMBB;
9257 }
9258 }
9259 if (EndMBB) {
9260 MBB->addSuccessor(EndMBB);
9261 MBB = EndMBB;
9262 MBB->addLiveIn(SystemZ::CC);
9263 }
9264
9265 MI.eraseFromParent();
9266 return MBB;
9267}
9268
9269// Decompose string pseudo-instruction MI into a loop that continually performs
9270// Opcode until CC != 3.
9271MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
9272 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9273 MachineFunction &MF = *MBB->getParent();
9274 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9276 DebugLoc DL = MI.getDebugLoc();
9277
9278 uint64_t End1Reg = MI.getOperand(0).getReg();
9279 uint64_t Start1Reg = MI.getOperand(1).getReg();
9280 uint64_t Start2Reg = MI.getOperand(2).getReg();
9281 uint64_t CharReg = MI.getOperand(3).getReg();
9282
9283 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
9284 uint64_t This1Reg = MRI.createVirtualRegister(RC);
9285 uint64_t This2Reg = MRI.createVirtualRegister(RC);
9286 uint64_t End2Reg = MRI.createVirtualRegister(RC);
9287
9288 MachineBasicBlock *StartMBB = MBB;
9290 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9291
9292 // StartMBB:
9293 // # fall through to LoopMBB
9294 MBB->addSuccessor(LoopMBB);
9295
9296 // LoopMBB:
9297 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
9298 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
9299 // R0L = %CharReg
9300 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
9301 // JO LoopMBB
9302 // # fall through to DoneMBB
9303 //
9304 // The load of R0L can be hoisted by post-RA LICM.
9305 MBB = LoopMBB;
9306
9307 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
9308 .addReg(Start1Reg).addMBB(StartMBB)
9309 .addReg(End1Reg).addMBB(LoopMBB);
9310 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
9311 .addReg(Start2Reg).addMBB(StartMBB)
9312 .addReg(End2Reg).addMBB(LoopMBB);
9313 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
9314 BuildMI(MBB, DL, TII->get(Opcode))
9315 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
9316 .addReg(This1Reg).addReg(This2Reg);
9317 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9319 MBB->addSuccessor(LoopMBB);
9320 MBB->addSuccessor(DoneMBB);
9321
9322 DoneMBB->addLiveIn(SystemZ::CC);
9323
9324 MI.eraseFromParent();
9325 return DoneMBB;
9326}
9327
9328// Update TBEGIN instruction with final opcode and register clobbers.
9329MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
9330 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
9331 bool NoFloat) const {
9332 MachineFunction &MF = *MBB->getParent();
9333 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9334 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9335
9336 // Update opcode.
9337 MI.setDesc(TII->get(Opcode));
9338
9339 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
9340 // Make sure to add the corresponding GRSM bits if they are missing.
9341 uint64_t Control = MI.getOperand(2).getImm();
9342 static const unsigned GPRControlBit[16] = {
9343 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
9344 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
9345 };
9346 Control |= GPRControlBit[15];
9347 if (TFI->hasFP(MF))
9348 Control |= GPRControlBit[11];
9349 MI.getOperand(2).setImm(Control);
9350
9351 // Add GPR clobbers.
9352 for (int I = 0; I < 16; I++) {
9353 if ((Control & GPRControlBit[I]) == 0) {
9354 unsigned Reg = SystemZMC::GR64Regs[I];
9355 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9356 }
9357 }
9358
9359 // Add FPR/VR clobbers.
9360 if (!NoFloat && (Control & 4) != 0) {
9361 if (Subtarget.hasVector()) {
9362 for (unsigned Reg : SystemZMC::VR128Regs) {
9363 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9364 }
9365 } else {
9366 for (unsigned Reg : SystemZMC::FP64Regs) {
9367 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9368 }
9369 }
9370 }
9371
9372 return MBB;
9373}
9374
9375MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
9376 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9377 MachineFunction &MF = *MBB->getParent();
9379 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9380 DebugLoc DL = MI.getDebugLoc();
9381
9382 Register SrcReg = MI.getOperand(0).getReg();
9383
9384 // Create new virtual register of the same class as source.
9385 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
9386 Register DstReg = MRI->createVirtualRegister(RC);
9387
9388 // Replace pseudo with a normal load-and-test that models the def as
9389 // well.
9390 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
9391 .addReg(SrcReg)
9392 .setMIFlags(MI.getFlags());
9393 MI.eraseFromParent();
9394
9395 return MBB;
9396}
9397
9398MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
9400 MachineFunction &MF = *MBB->getParent();
9402 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9403 DebugLoc DL = MI.getDebugLoc();
9404 const unsigned ProbeSize = getStackProbeSize(MF);
9405 Register DstReg = MI.getOperand(0).getReg();
9406 Register SizeReg = MI.getOperand(2).getReg();
9407
9408 MachineBasicBlock *StartMBB = MBB;
9410 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
9411 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
9412 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
9413 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
9414
9417
9418 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9419 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9420
9421 // LoopTestMBB
9422 // BRC TailTestMBB
9423 // # fallthrough to LoopBodyMBB
9424 StartMBB->addSuccessor(LoopTestMBB);
9425 MBB = LoopTestMBB;
9426 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
9427 .addReg(SizeReg)
9428 .addMBB(StartMBB)
9429 .addReg(IncReg)
9430 .addMBB(LoopBodyMBB);
9431 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
9432 .addReg(PHIReg)
9433 .addImm(ProbeSize);
9434 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9436 .addMBB(TailTestMBB);
9437 MBB->addSuccessor(LoopBodyMBB);
9438 MBB->addSuccessor(TailTestMBB);
9439
9440 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
9441 // J LoopTestMBB
9442 MBB = LoopBodyMBB;
9443 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
9444 .addReg(PHIReg)
9445 .addImm(ProbeSize);
9446 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
9447 .addReg(SystemZ::R15D)
9448 .addImm(ProbeSize);
9449 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9450 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
9451 .setMemRefs(VolLdMMO);
9452 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
9453 MBB->addSuccessor(LoopTestMBB);
9454
9455 // TailTestMBB
9456 // BRC DoneMBB
9457 // # fallthrough to TailMBB
9458 MBB = TailTestMBB;
9459 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9460 .addReg(PHIReg)
9461 .addImm(0);
9462 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9464 .addMBB(DoneMBB);
9465 MBB->addSuccessor(TailMBB);
9466 MBB->addSuccessor(DoneMBB);
9467
9468 // TailMBB
9469 // # fallthrough to DoneMBB
9470 MBB = TailMBB;
9471 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
9472 .addReg(SystemZ::R15D)
9473 .addReg(PHIReg);
9474 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9475 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
9476 .setMemRefs(VolLdMMO);
9477 MBB->addSuccessor(DoneMBB);
9478
9479 // DoneMBB
9480 MBB = DoneMBB;
9481 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
9482 .addReg(SystemZ::R15D);
9483
9484 MI.eraseFromParent();
9485 return DoneMBB;
9486}
9487
9488SDValue SystemZTargetLowering::
9489getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
9491 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
9492 SDLoc DL(SP);
9493 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
9494 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
9495}
9496
9499 switch (MI.getOpcode()) {
9500 case SystemZ::ADJCALLSTACKDOWN:
9501 case SystemZ::ADJCALLSTACKUP:
9502 return emitAdjCallStack(MI, MBB);
9503
9504 case SystemZ::Select32:
9505 case SystemZ::Select64:
9506 case SystemZ::Select128:
9507 case SystemZ::SelectF32:
9508 case SystemZ::SelectF64:
9509 case SystemZ::SelectF128:
9510 case SystemZ::SelectVR32:
9511 case SystemZ::SelectVR64:
9512 case SystemZ::SelectVR128:
9513 return emitSelect(MI, MBB);
9514
9515 case SystemZ::CondStore8Mux:
9516 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
9517 case SystemZ::CondStore8MuxInv:
9518 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
9519 case SystemZ::CondStore16Mux:
9520 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
9521 case SystemZ::CondStore16MuxInv:
9522 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
9523 case SystemZ::CondStore32Mux:
9524 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
9525 case SystemZ::CondStore32MuxInv:
9526 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
9527 case SystemZ::CondStore8:
9528 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
9529 case SystemZ::CondStore8Inv:
9530 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
9531 case SystemZ::CondStore16:
9532 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
9533 case SystemZ::CondStore16Inv:
9534 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
9535 case SystemZ::CondStore32:
9536 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
9537 case SystemZ::CondStore32Inv:
9538 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
9539 case SystemZ::CondStore64:
9540 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
9541 case SystemZ::CondStore64Inv:
9542 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
9543 case SystemZ::CondStoreF32:
9544 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
9545 case SystemZ::CondStoreF32Inv:
9546 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
9547 case SystemZ::CondStoreF64:
9548 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
9549 case SystemZ::CondStoreF64Inv:
9550 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
9551
9552 case SystemZ::SCmp128Hi:
9553 return emitICmp128Hi(MI, MBB, false);
9554 case SystemZ::UCmp128Hi:
9555 return emitICmp128Hi(MI, MBB, true);
9556
9557 case SystemZ::PAIR128:
9558 return emitPair128(MI, MBB);
9559 case SystemZ::AEXT128:
9560 return emitExt128(MI, MBB, false);
9561 case SystemZ::ZEXT128:
9562 return emitExt128(MI, MBB, true);
9563
9564 case SystemZ::ATOMIC_SWAPW:
9565 return emitAtomicLoadBinary(MI, MBB, 0);
9566
9567 case SystemZ::ATOMIC_LOADW_AR:
9568 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
9569 case SystemZ::ATOMIC_LOADW_AFI:
9570 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
9571
9572 case SystemZ::ATOMIC_LOADW_SR:
9573 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
9574
9575 case SystemZ::ATOMIC_LOADW_NR:
9576 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
9577 case SystemZ::ATOMIC_LOADW_NILH:
9578 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
9579
9580 case SystemZ::ATOMIC_LOADW_OR:
9581 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
9582 case SystemZ::ATOMIC_LOADW_OILH:
9583 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
9584
9585 case SystemZ::ATOMIC_LOADW_XR:
9586 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
9587 case SystemZ::ATOMIC_LOADW_XILF:
9588 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
9589
9590 case SystemZ::ATOMIC_LOADW_NRi:
9591 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
9592 case SystemZ::ATOMIC_LOADW_NILHi:
9593 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
9594
9595 case SystemZ::ATOMIC_LOADW_MIN:
9596 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
9597 case SystemZ::ATOMIC_LOADW_MAX:
9598 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
9599 case SystemZ::ATOMIC_LOADW_UMIN:
9600 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
9601 case SystemZ::ATOMIC_LOADW_UMAX:
9602 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
9603
9604 case SystemZ::ATOMIC_CMP_SWAPW:
9605 return emitAtomicCmpSwapW(MI, MBB);
9606 case SystemZ::MVCImm:
9607 case SystemZ::MVCReg:
9608 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
9609 case SystemZ::NCImm:
9610 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
9611 case SystemZ::OCImm:
9612 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
9613 case SystemZ::XCImm:
9614 case SystemZ::XCReg:
9615 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
9616 case SystemZ::CLCImm:
9617 case SystemZ::CLCReg:
9618 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
9619 case SystemZ::MemsetImmImm:
9620 case SystemZ::MemsetImmReg:
9621 case SystemZ::MemsetRegImm:
9622 case SystemZ::MemsetRegReg:
9623 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
9624 case SystemZ::CLSTLoop:
9625 return emitStringWrapper(MI, MBB, SystemZ::CLST);
9626 case SystemZ::MVSTLoop:
9627 return emitStringWrapper(MI, MBB, SystemZ::MVST);
9628 case SystemZ::SRSTLoop:
9629 return emitStringWrapper(MI, MBB, SystemZ::SRST);
9630 case SystemZ::TBEGIN:
9631 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
9632 case SystemZ::TBEGIN_nofloat:
9633 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
9634 case SystemZ::TBEGINC:
9635 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
9636 case SystemZ::LTEBRCompare_Pseudo:
9637 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
9638 case SystemZ::LTDBRCompare_Pseudo:
9639 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
9640 case SystemZ::LTXBRCompare_Pseudo:
9641 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
9642
9643 case SystemZ::PROBED_ALLOCA:
9644 return emitProbedAlloca(MI, MBB);
9645
9646 case TargetOpcode::STACKMAP:
9647 case TargetOpcode::PATCHPOINT:
9648 return emitPatchPoint(MI, MBB);
9649
9650 default:
9651 llvm_unreachable("Unexpected instr type to insert");
9652 }
9653}
9654
9655// This is only used by the isel schedulers, and is needed only to prevent
9656// compiler from crashing when list-ilp is used.
9657const TargetRegisterClass *
9658SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
9659 if (VT == MVT::Untyped)
9660 return &SystemZ::ADDR128BitRegClass;
9662}
9663
9664SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
9665 SelectionDAG &DAG) const {
9666 SDLoc dl(Op);
9667 /*
9668 The rounding method is in FPC Byte 3 bits 6-7, and has the following
9669 settings:
9670 00 Round to nearest
9671 01 Round to 0
9672 10 Round to +inf
9673 11 Round to -inf
9674
9675 FLT_ROUNDS, on the other hand, expects the following:
9676 -1 Undefined
9677 0 Round to 0
9678 1 Round to nearest
9679 2 Round to +inf
9680 3 Round to -inf
9681 */
9682
9683 // Save FPC to register.
9684 SDValue Chain = Op.getOperand(0);
9685 SDValue EFPC(
9686 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
9687 Chain = EFPC.getValue(1);
9688
9689 // Transform as necessary
9690 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
9691 DAG.getConstant(3, dl, MVT::i32));
9692 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
9693 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
9694 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
9695 DAG.getConstant(1, dl, MVT::i32)));
9696
9697 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
9698 DAG.getConstant(1, dl, MVT::i32));
9699 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
9700
9701 return DAG.getMergeValues({RetVal, Chain}, dl);
9702}
9703
9704SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
9705 SelectionDAG &DAG) const {
9706 EVT VT = Op.getValueType();
9707 Op = Op.getOperand(0);
9708 EVT OpVT = Op.getValueType();
9709
9710 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
9711
9712 SDLoc DL(Op);
9713
9714 // load a 0 vector for the third operand of VSUM.
9715 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));
9716
9717 // execute VSUM.
9718 switch (OpVT.getScalarSizeInBits()) {
9719 case 8:
9720 case 16:
9721 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
9722 [[fallthrough]];
9723 case 32:
9724 case 64:
9725 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
9726 DAG.getBitcast(Op.getValueType(), Zero));
9727 break;
9728 case 128:
9729 break; // VSUM over v1i128 should not happen and would be a noop
9730 default:
9731 llvm_unreachable("Unexpected scalar size.");
9732 }
9733 // Cast to original vector type, retrieve last element.
9734 return DAG.getNode(
9735 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),
9736 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));
9737}
unsigned const MachineRegisterInfo * MRI
#define Success
unsigned Intr
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
amdgpu AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
IRTranslator LLVM IR MI
#define RegName(no)
lazy value info
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
uint64_t High
uint64_t IntrinsicInst * II
#define P(N)
const char LLVMTargetMachineRef TM
static bool isSelectPseudo(MachineInstr &MI)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
#define OPCODE(NAME)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1366
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1472
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1310
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:238
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1448
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition: APInt.h:302
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1237
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:838
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:831
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
@ Add
*p = old + v
Definition: Instructions.h:712
@ Or
*p = old | v
Definition: Instructions.h:720
@ Sub
*p = old - v
Definition: Instructions.h:714
@ And
*p = old & v
Definition: Instructions.h:716
@ Xor
*p = old ^ v
Definition: Instructions.h:722
BinOp getOperation() const
Definition: Instructions.h:787
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:391
The address of a basic block.
Definition: Constants.h:890
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:745
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:757
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:274
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:719
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:582
bool hasPrivateLinkage() const
Definition: GlobalValue.h:527
bool hasInternalLinkage() const
Definition: GlobalValue.h:526
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:174
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setMaxCallFrameSize(unsigned S)
MachineFunctionProperties & reset(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getMachineOpcode() const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:227
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:734
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:492
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:744
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:840
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:486
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:673
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:874
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:487
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:785
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:688
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:780
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:481
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:811
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:857
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:499
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:751
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:568
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void reserve(size_type N)
Definition: SmallVector.h:676
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:290
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:455
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:250
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:669
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
iterator end() const
Definition: StringRef.h:113
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
A SystemZ-specific class detailing special use registers particular for calling conventions.
A SystemZ-specific constant pool value.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const
const TargetFrameLowering * getFrameLowering() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
const SystemZRegisterInfo * getRegisterInfo() const override
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
bool useSoftFloat() const override
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Let target indicate that an extending atomic load of the specified type is legal.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
use_iterator use_begin()
Definition: Value.h:360
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:778
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1167
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1163
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:751
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:490
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1310
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:742
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1196
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1312
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1282
@ STRICT_FCEIL
Definition: ISDOpcodes.h:440
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1313
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1072
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:811
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
Definition: ISDOpcodes.h:1295
@ STRICT_FMINIMUM
Definition: ISDOpcodes.h:450
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:818
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:716
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1269
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1274
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:848
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:491
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1308
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:931
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1309
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1451
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:802
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:464
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1262
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1029
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:958
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1118
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1311
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1097
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:521
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:755
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1278
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:641
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1192
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:444
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1405
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:908
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:673
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:733
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1306
@ STRICT_FMAXIMUM
Definition: ISDOpcodes.h:449
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:438
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:808
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:439
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:770
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1314
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1006
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1082
@ ConstantPool
Definition: ISDOpcodes.h:82
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:837
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:826
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:916
@ STRICT_FROUND
Definition: ISDOpcodes.h:442
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:764
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:463
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:441
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1304
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:457
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:479
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:456
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1025
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1305
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:864
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1223
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:484
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1249
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1303
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:897
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:859
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:437
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:814
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1111
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:791
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:507
@ AssertZext
Definition: ISDOpcodes.h:62
@ STRICT_FRINT
Definition: ISDOpcodes.h:436
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1361
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1574
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1554
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition: SystemZ.h:40
static bool isImmHH(uint64_t Val)
Definition: SystemZ.h:176
const unsigned CCMASK_TEND
Definition: SystemZ.h:97
const unsigned CCMASK_CS_EQ
Definition: SystemZ.h:67
const unsigned CCMASK_TBEGIN
Definition: SystemZ.h:92
const unsigned CCMASK_0
Definition: SystemZ.h:27
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition: SystemZ.h:82
const unsigned CCMASK_LOGICAL_CARRY
Definition: SystemZ.h:60
const unsigned TDCMASK_NORMAL_MINUS
Definition: SystemZ.h:122
const unsigned CCMASK_TDC
Definition: SystemZ.h:109
const unsigned CCMASK_FCMP
Definition: SystemZ.h:48
const unsigned CCMASK_TM_SOME_0
Definition: SystemZ.h:81
static bool isImmHL(uint64_t Val)
Definition: SystemZ.h:171
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition: SystemZ.h:124
const unsigned PFD_READ
Definition: SystemZ.h:115
const unsigned CCMASK_1
Definition: SystemZ.h:28
const unsigned TDCMASK_NORMAL_PLUS
Definition: SystemZ.h:121
const unsigned PFD_WRITE
Definition: SystemZ.h:116
const unsigned CCMASK_CMP_GT
Definition: SystemZ.h:37
const unsigned TDCMASK_QNAN_MINUS
Definition: SystemZ.h:128
const unsigned CCMASK_CS
Definition: SystemZ.h:69
const unsigned CCMASK_ANY
Definition: SystemZ.h:31
const unsigned CCMASK_ARITH
Definition: SystemZ.h:55
const unsigned CCMASK_TM_MIXED_MSB_0
Definition: SystemZ.h:78
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition: SystemZ.h:123
static bool isImmLL(uint64_t Val)
Definition: SystemZ.h:161
const unsigned VectorBits
Definition: SystemZ.h:154
static bool isImmLH(uint64_t Val)
Definition: SystemZ.h:166
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition: SystemZ.h:125
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition: SystemZ.h:77
const unsigned IPM_CC
Definition: SystemZ.h:112
const unsigned CCMASK_CMP_LE
Definition: SystemZ.h:39
const unsigned CCMASK_CMP_O
Definition: SystemZ.h:44
const unsigned CCMASK_CMP_EQ
Definition: SystemZ.h:35
const unsigned VectorBytes
Definition: SystemZ.h:158
const unsigned TDCMASK_INFINITY_MINUS
Definition: SystemZ.h:126
const unsigned CCMASK_ICMP
Definition: SystemZ.h:47
const unsigned CCMASK_VCMP_ALL
Definition: SystemZ.h:101
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition: SystemZ.h:104
const unsigned CCMASK_TM_MIXED_MSB_1
Definition: SystemZ.h:79
const unsigned CCMASK_TM_MSB_0
Definition: SystemZ.h:83
const unsigned CCMASK_ARITH_OVERFLOW
Definition: SystemZ.h:54
const unsigned CCMASK_CS_NE
Definition: SystemZ.h:68
const unsigned TDCMASK_SNAN_PLUS
Definition: SystemZ.h:129
const unsigned CCMASK_TM
Definition: SystemZ.h:85
const unsigned CCMASK_3
Definition: SystemZ.h:30
const unsigned CCMASK_CMP_LT
Definition: SystemZ.h:36
const unsigned CCMASK_CMP_NE
Definition: SystemZ.h:38
const unsigned TDCMASK_ZERO_PLUS
Definition: SystemZ.h:119
const unsigned TDCMASK_QNAN_PLUS
Definition: SystemZ.h:127
const unsigned TDCMASK_ZERO_MINUS
Definition: SystemZ.h:120
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition: SystemZ.h:80
const unsigned CCMASK_LOGICAL_BORROW
Definition: SystemZ.h:62
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition: SystemZ.h:43
const unsigned CCMASK_LOGICAL
Definition: SystemZ.h:64
const unsigned CCMASK_TM_MSB_1
Definition: SystemZ.h:84
const unsigned TDCMASK_SNAN_MINUS
Definition: SystemZ.h:130
@ GeneralDynamic
Definition: CodeGen.h:46
@ GS
Definition: X86.h:209
Reg
All possible values of the reg field in the ModR/M byte.
support::ulittle32_t Word
Definition: IRSymtab.h:52
NodeAddr< CodeNode * > Code
Definition: RDFGraph.h:388
constexpr const char32_t SBase
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:353
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Done
Definition: Threading.h:61
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ Mul
Product of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:573
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define EQ(a, b)
Definition: regexec.c:112
#define NC
Definition: regutils.h:42
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:203
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:175
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:161
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:70
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:300
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:169
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:134
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})