LLVM 20.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
23#include "llvm/IR/GlobalAlias.h"
25#include "llvm/IR/Intrinsics.h"
26#include "llvm/IR/IntrinsicsS390.h"
30#include <cctype>
31#include <optional>
32
33using namespace llvm;
34
35#define DEBUG_TYPE "systemz-lower"
36
37namespace {
38// Represents information about a comparison.
39struct Comparison {
40 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
41 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
42 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
43
44 // The operands to the comparison.
45 SDValue Op0, Op1;
46
47 // Chain if this is a strict floating-point comparison.
48 SDValue Chain;
49
50 // The opcode that should be used to compare Op0 and Op1.
51 unsigned Opcode;
52
53 // A SystemZICMP value. Only used for integer comparisons.
54 unsigned ICmpType;
55
56 // The mask of CC values that Opcode can produce.
57 unsigned CCValid;
58
59 // The mask of CC values for which the original condition is true.
60 unsigned CCMask;
61};
62} // end anonymous namespace
63
64// Classify VT as either 32 or 64 bit.
65static bool is32Bit(EVT VT) {
66 switch (VT.getSimpleVT().SimpleTy) {
67 case MVT::i32:
68 return true;
69 case MVT::i64:
70 return false;
71 default:
72 llvm_unreachable("Unsupported type");
73 }
74}
75
76// Return a version of MachineOperand that can be safely used before the
77// final use.
79 if (Op.isReg())
80 Op.setIsKill(false);
81 return Op;
82}
83
85 const SystemZSubtarget &STI)
86 : TargetLowering(TM), Subtarget(STI) {
87 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
88
89 auto *Regs = STI.getSpecialRegisters();
90
91 // Set up the register classes.
92 if (Subtarget.hasHighWord())
93 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
94 else
95 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
96 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
97 if (!useSoftFloat()) {
98 if (Subtarget.hasVector()) {
99 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
100 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
101 } else {
102 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
103 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
104 }
105 if (Subtarget.hasVectorEnhancements1())
106 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
107 else
108 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
109
110 if (Subtarget.hasVector()) {
111 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
112 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
113 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
114 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
115 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
116 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
117 }
118
119 if (Subtarget.hasVector())
120 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
121 }
122
123 // Compute derived properties from the register classes
125
126 // Set up special registers.
127 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
128
129 // TODO: It may be better to default to latency-oriented scheduling, however
130 // LLVM's current latency-oriented scheduler can't handle physreg definitions
131 // such as SystemZ has with CC, so set this to the register-pressure
132 // scheduler, because it can.
134
137
139
140 // Instructions are strings of 2-byte aligned 2-byte values.
142 // For performance reasons we prefer 16-byte alignment.
144
145 // Handle operations that are handled in a similar way for all types.
146 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
147 I <= MVT::LAST_FP_VALUETYPE;
148 ++I) {
150 if (isTypeLegal(VT)) {
151 // Lower SET_CC into an IPM-based sequence.
155
156 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
158
159 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
162 }
163 }
164
165 // Expand jump table branches as address arithmetic followed by an
166 // indirect jump.
168
169 // Expand BRCOND into a BR_CC (see above).
171
172 // Handle integer types except i128.
173 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
174 I <= MVT::LAST_INTEGER_VALUETYPE;
175 ++I) {
177 if (isTypeLegal(VT) && VT != MVT::i128) {
179
180 // Expand individual DIV and REMs into DIVREMs.
187
188 // Support addition/subtraction with overflow.
191
192 // Support addition/subtraction with carry.
195
196 // Support carry in as value rather than glue.
199
200 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
201 // available, or if the operand is constant.
203
204 // Use POPCNT on z196 and above.
205 if (Subtarget.hasPopulationCount())
207 else
209
210 // No special instructions for these.
213
214 // Use *MUL_LOHI where possible instead of MULH*.
219
220 // Only z196 and above have native support for conversions to unsigned.
221 // On z10, promoting to i64 doesn't generate an inexact condition for
222 // values that are outside the i32 range but in the i64 range, so use
223 // the default expansion.
224 if (!Subtarget.hasFPExtension())
226
227 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
228 // default to Expand, so need to be modified to Legal where appropriate.
230 if (Subtarget.hasFPExtension())
232
233 // And similarly for STRICT_[SU]INT_TO_FP.
235 if (Subtarget.hasFPExtension())
237 }
238 }
239
240 // Handle i128 if legal.
241 if (isTypeLegal(MVT::i128)) {
242 // No special instructions for these.
258
259 // Support addition/subtraction with carry.
264
265 // Use VPOPCT and add up partial results.
267
268 // We have to use libcalls for these.
277 }
278
279 // Type legalization will convert 8- and 16-bit atomic operations into
280 // forms that operate on i32s (but still keeping the original memory VT).
281 // Lower them into full i32 operations.
293
294 // Whether or not i128 is not a legal type, we need to custom lower
295 // the atomic operations in order to exploit SystemZ instructions.
300
301 // Mark sign/zero extending atomic loads as legal, which will make
302 // DAGCombiner fold extensions into atomic loads if possible.
304 {MVT::i8, MVT::i16, MVT::i32}, Legal);
306 {MVT::i8, MVT::i16}, Legal);
308 MVT::i8, Legal);
309
310 // We can use the CC result of compare-and-swap to implement
311 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
315
317
318 // Traps are legal, as we will convert them to "j .+2".
319 setOperationAction(ISD::TRAP, MVT::Other, Legal);
320
321 // z10 has instructions for signed but not unsigned FP conversion.
322 // Handle unsigned 32-bit types as signed 64-bit types.
323 if (!Subtarget.hasFPExtension()) {
328 }
329
330 // We have native support for a 64-bit CTLZ, via FLOGR.
334
335 // On z15 we have native support for a 64-bit CTPOP.
336 if (Subtarget.hasMiscellaneousExtensions3()) {
339 }
340
341 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
343
344 // Expand 128 bit shifts without using a libcall.
348
349 // Also expand 256 bit shifts if i128 is a legal type.
350 if (isTypeLegal(MVT::i128)) {
354 }
355
356 // Handle bitcast from fp128 to i128.
357 if (!isTypeLegal(MVT::i128))
359
360 // We have native instructions for i8, i16 and i32 extensions, but not i1.
362 for (MVT VT : MVT::integer_valuetypes()) {
366 }
367
368 // Handle the various types of symbolic address.
374
375 // We need to handle dynamic allocations specially because of the
376 // 160-byte area at the bottom of the stack.
379
382
383 // Handle prefetches with PFD or PFDRL.
385
386 // Handle readcyclecounter with STCKF.
388
390 // Assume by default that all vector operations need to be expanded.
391 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
392 if (getOperationAction(Opcode, VT) == Legal)
393 setOperationAction(Opcode, VT, Expand);
394
395 // Likewise all truncating stores and extending loads.
396 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
397 setTruncStoreAction(VT, InnerVT, Expand);
400 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
401 }
402
403 if (isTypeLegal(VT)) {
404 // These operations are legal for anything that can be stored in a
405 // vector register, even if there is no native support for the format
406 // as such. In particular, we can do these for v4f32 even though there
407 // are no specific instructions for that format.
413
414 // Likewise, except that we need to replace the nodes with something
415 // more specific.
418 }
419 }
420
421 // Handle integer vector types.
423 if (isTypeLegal(VT)) {
424 // These operations have direct equivalents.
429 if (VT != MVT::v2i64)
435 if (Subtarget.hasVectorEnhancements1())
437 else
441
442 // Convert a GPR scalar to a vector by inserting it into element 0.
444
445 // Use a series of unpacks for extensions.
448
449 // Detect shifts/rotates by a scalar amount and convert them into
450 // V*_BY_SCALAR.
455
456 // Add ISD::VECREDUCE_ADD as custom in order to implement
457 // it with VZERO+VSUM
459
460 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
461 // and inverting the result as necessary.
463 }
464 }
465
466 if (Subtarget.hasVector()) {
467 // There should be no need to check for float types other than v2f64
468 // since <2 x f32> isn't a legal type.
477
486 }
487
488 if (Subtarget.hasVectorEnhancements2()) {
497
506 }
507
508 // Handle floating-point types.
509 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
510 I <= MVT::LAST_FP_VALUETYPE;
511 ++I) {
513 if (isTypeLegal(VT)) {
514 // We can use FI for FRINT.
516
517 // We can use the extended form of FI for other rounding operations.
518 if (Subtarget.hasFPExtension()) {
524 }
525
526 // No special instructions for these.
532
533 // Special treatment.
535
536 // Handle constrained floating-point operations.
546 if (Subtarget.hasFPExtension()) {
552 }
553 }
554 }
555
556 // Handle floating-point vector types.
557 if (Subtarget.hasVector()) {
558 // Scalar-to-vector conversion is just a subreg.
561
562 // Some insertions and extractions can be done directly but others
563 // need to go via integers.
568
569 // These operations have direct equivalents.
570 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
571 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
572 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
573 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
574 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
575 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
576 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
577 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
578 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
581 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
584
585 // Handle constrained floating-point operations.
598
603 if (Subtarget.hasVectorEnhancements1()) {
606 }
607 }
608
609 // The vector enhancements facility 1 has instructions for these.
610 if (Subtarget.hasVectorEnhancements1()) {
611 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
612 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
613 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
614 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
615 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
616 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
617 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
618 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
619 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
622 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
625
630
635
640
645
650
651 // Handle constrained floating-point operations.
664 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
665 MVT::v4f32, MVT::v2f64 }) {
670 }
671 }
672
673 // We only have fused f128 multiply-addition on vector registers.
674 if (!Subtarget.hasVectorEnhancements1()) {
677 }
678
679 // We don't have a copysign instruction on vector registers.
680 if (Subtarget.hasVectorEnhancements1())
682
683 // Needed so that we don't try to implement f128 constant loads using
684 // a load-and-extend of a f80 constant (in cases where the constant
685 // would fit in an f80).
686 for (MVT VT : MVT::fp_valuetypes())
687 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
688
689 // We don't have extending load instruction on vector registers.
690 if (Subtarget.hasVectorEnhancements1()) {
691 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
692 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
693 }
694
695 // Floating-point truncation and stores need to be done separately.
696 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
697 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
698 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
699
700 // We have 64-bit FPR<->GPR moves, but need special handling for
701 // 32-bit forms.
702 if (!Subtarget.hasVector()) {
705 }
706
707 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
708 // structure, but VAEND is a no-op.
712
713 if (Subtarget.isTargetzOS()) {
714 // Handle address space casts between mixed sized pointers.
717 }
718
720
721 // Codes for which we want to perform some z-specific combinations.
725 ISD::LOAD,
736 ISD::SDIV,
737 ISD::UDIV,
738 ISD::SREM,
739 ISD::UREM,
742
743 // Handle intrinsics.
746
747 // We want to use MVC in preference to even a single load/store pair.
748 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
750
751 // The main memset sequence is a byte store followed by an MVC.
752 // Two STC or MV..I stores win over that, but the kind of fused stores
753 // generated by target-independent code don't when the byte value is
754 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
755 // than "STC;MVC". Handle the choice in target-specific code instead.
756 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
758
759 // Default to having -disable-strictnode-mutation on
760 IsStrictFPEnabled = true;
761
762 if (Subtarget.isTargetzOS()) {
763 struct RTLibCallMapping {
764 RTLIB::Libcall Code;
765 const char *Name;
766 };
767 static RTLibCallMapping RTLibCallCommon[] = {
768#define HANDLE_LIBCALL(code, name) {RTLIB::code, name},
769#include "ZOSLibcallNames.def"
770 };
771 for (auto &E : RTLibCallCommon)
772 setLibcallName(E.Code, E.Name);
773 }
774}
775
777 return Subtarget.hasSoftFloat();
778}
779
781 LLVMContext &, EVT VT) const {
782 if (!VT.isVector())
783 return MVT::i32;
785}
786
788 const MachineFunction &MF, EVT VT) const {
789 VT = VT.getScalarType();
790
791 if (!VT.isSimple())
792 return false;
793
794 switch (VT.getSimpleVT().SimpleTy) {
795 case MVT::f32:
796 case MVT::f64:
797 return true;
798 case MVT::f128:
799 return Subtarget.hasVectorEnhancements1();
800 default:
801 break;
802 }
803
804 return false;
805}
806
807// Return true if the constant can be generated with a vector instruction,
808// such as VGM, VGMB or VREPI.
810 const SystemZSubtarget &Subtarget) {
811 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
812 if (!Subtarget.hasVector() ||
813 (isFP128 && !Subtarget.hasVectorEnhancements1()))
814 return false;
815
816 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
817 // preferred way of creating all-zero and all-one vectors so give it
818 // priority over other methods below.
819 unsigned Mask = 0;
820 unsigned I = 0;
821 for (; I < SystemZ::VectorBytes; ++I) {
822 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
823 if (Byte == 0xff)
824 Mask |= 1ULL << I;
825 else if (Byte != 0)
826 break;
827 }
828 if (I == SystemZ::VectorBytes) {
830 OpVals.push_back(Mask);
832 return true;
833 }
834
835 if (SplatBitSize > 64)
836 return false;
837
838 auto tryValue = [&](uint64_t Value) -> bool {
839 // Try VECTOR REPLICATE IMMEDIATE
840 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
841 if (isInt<16>(SignedValue)) {
842 OpVals.push_back(((unsigned) SignedValue));
845 SystemZ::VectorBits / SplatBitSize);
846 return true;
847 }
848 // Try VECTOR GENERATE MASK
849 unsigned Start, End;
850 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
851 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
852 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
853 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
854 OpVals.push_back(Start - (64 - SplatBitSize));
855 OpVals.push_back(End - (64 - SplatBitSize));
858 SystemZ::VectorBits / SplatBitSize);
859 return true;
860 }
861 return false;
862 };
863
864 // First try assuming that any undefined bits above the highest set bit
865 // and below the lowest set bit are 1s. This increases the likelihood of
866 // being able to use a sign-extended element value in VECTOR REPLICATE
867 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
868 uint64_t SplatBitsZ = SplatBits.getZExtValue();
869 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
870 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
871 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
872 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
873 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
874 if (tryValue(SplatBitsZ | Upper | Lower))
875 return true;
876
877 // Now try assuming that any undefined bits between the first and
878 // last defined set bits are set. This increases the chances of
879 // using a non-wraparound mask.
880 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
881 return tryValue(SplatBitsZ | Middle);
882}
883
885 if (IntImm.isSingleWord()) {
886 IntBits = APInt(128, IntImm.getZExtValue());
887 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
888 } else
889 IntBits = IntImm;
890 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
891
892 // Find the smallest splat.
893 SplatBits = IntImm;
894 unsigned Width = SplatBits.getBitWidth();
895 while (Width > 8) {
896 unsigned HalfSize = Width / 2;
897 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
898 APInt LowValue = SplatBits.trunc(HalfSize);
899
900 // If the two halves do not match, stop here.
901 if (HighValue != LowValue || 8 > HalfSize)
902 break;
903
904 SplatBits = HighValue;
905 Width = HalfSize;
906 }
907 SplatUndef = 0;
908 SplatBitSize = Width;
909}
910
912 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
913 bool HasAnyUndefs;
914
915 // Get IntBits by finding the 128 bit splat.
916 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
917 true);
918
919 // Get SplatBits by finding the 8 bit or greater splat.
920 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
921 true);
922}
923
925 bool ForCodeSize) const {
926 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
927 if (Imm.isZero() || Imm.isNegZero())
928 return true;
929
931}
932
933/// Returns true if stack probing through inline assembly is requested.
935 // If the function specifically requests inline stack probes, emit them.
936 if (MF.getFunction().hasFnAttribute("probe-stack"))
937 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
938 "inline-asm";
939 return false;
940}
941
945}
946
950}
951
954 // Don't expand subword operations as they require special treatment.
955 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
957
958 // Don't expand if there is a target instruction available.
959 if (Subtarget.hasInterlockedAccess1() &&
960 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
967
969}
970
972 // We can use CGFI or CLGFI.
973 return isInt<32>(Imm) || isUInt<32>(Imm);
974}
975
977 // We can use ALGFI or SLGFI.
978 return isUInt<32>(Imm) || isUInt<32>(-Imm);
979}
980
982 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
983 // Unaligned accesses should never be slower than the expanded version.
984 // We check specifically for aligned accesses in the few cases where
985 // they are required.
986 if (Fast)
987 *Fast = 1;
988 return true;
989}
990
991// Information about the addressing mode for a memory access.
993 // True if a long displacement is supported.
995
996 // True if use of index register is supported.
998
999 AddressingMode(bool LongDispl, bool IdxReg) :
1000 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
1001};
1002
1003// Return the desired addressing mode for a Load which has only one use (in
1004// the same block) which is a Store.
1006 Type *Ty) {
1007 // With vector support a Load->Store combination may be combined to either
1008 // an MVC or vector operations and it seems to work best to allow the
1009 // vector addressing mode.
1010 if (HasVector)
1011 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1012
1013 // Otherwise only the MVC case is special.
1014 bool MVC = Ty->isIntegerTy(8);
1015 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1016}
1017
1018// Return the addressing mode which seems most desirable given an LLVM
1019// Instruction pointer.
1020static AddressingMode
1022 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1023 switch (II->getIntrinsicID()) {
1024 default: break;
1025 case Intrinsic::memset:
1026 case Intrinsic::memmove:
1027 case Intrinsic::memcpy:
1028 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1029 }
1030 }
1031
1032 if (isa<LoadInst>(I) && I->hasOneUse()) {
1033 auto *SingleUser = cast<Instruction>(*I->user_begin());
1034 if (SingleUser->getParent() == I->getParent()) {
1035 if (isa<ICmpInst>(SingleUser)) {
1036 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1037 if (C->getBitWidth() <= 64 &&
1038 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1039 // Comparison of memory with 16 bit signed / unsigned immediate
1040 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1041 } else if (isa<StoreInst>(SingleUser))
1042 // Load->Store
1043 return getLoadStoreAddrMode(HasVector, I->getType());
1044 }
1045 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1046 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1047 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1048 // Load->Store
1049 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1050 }
1051
1052 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1053
1054 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1055 // dependencies (LDE only supports small offsets).
1056 // * Utilize the vector registers to hold floating point
1057 // values (vector load / store instructions only support small
1058 // offsets).
1059
1060 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1061 I->getOperand(0)->getType());
1062 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1063 bool IsVectorAccess = MemAccessTy->isVectorTy();
1064
1065 // A store of an extracted vector element will be combined into a VSTE type
1066 // instruction.
1067 if (!IsVectorAccess && isa<StoreInst>(I)) {
1068 Value *DataOp = I->getOperand(0);
1069 if (isa<ExtractElementInst>(DataOp))
1070 IsVectorAccess = true;
1071 }
1072
1073 // A load which gets inserted into a vector element will be combined into a
1074 // VLE type instruction.
1075 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1076 User *LoadUser = *I->user_begin();
1077 if (isa<InsertElementInst>(LoadUser))
1078 IsVectorAccess = true;
1079 }
1080
1081 if (IsFPAccess || IsVectorAccess)
1082 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1083 }
1084
1085 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1086}
1087
1089 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1090 // Punt on globals for now, although they can be used in limited
1091 // RELATIVE LONG cases.
1092 if (AM.BaseGV)
1093 return false;
1094
1095 // Require a 20-bit signed offset.
1096 if (!isInt<20>(AM.BaseOffs))
1097 return false;
1098
1099 bool RequireD12 =
1100 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1101 AddressingMode SupportedAM(!RequireD12, true);
1102 if (I != nullptr)
1103 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1104
1105 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1106 return false;
1107
1108 if (!SupportedAM.IndexReg)
1109 // No indexing allowed.
1110 return AM.Scale == 0;
1111 else
1112 // Indexing is OK but no scale factor can be applied.
1113 return AM.Scale == 0 || AM.Scale == 1;
1114}
1115
1117 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
1118 unsigned SrcAS, const AttributeList &FuncAttributes) const {
1119 const int MVCFastLen = 16;
1120
1121 if (Limit != ~unsigned(0)) {
1122 // Don't expand Op into scalar loads/stores in these cases:
1123 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1124 return false; // Small memcpy: Use MVC
1125 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1126 return false; // Small memset (first byte with STC/MVI): Use MVC
1127 if (Op.isZeroMemset())
1128 return false; // Memset zero: Use XC
1129 }
1130
1131 return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
1132 SrcAS, FuncAttributes);
1133}
1134
1136 const AttributeList &FuncAttributes) const {
1137 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1138}
1139
1140bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1141 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1142 return false;
1143 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1144 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1145 return FromBits > ToBits;
1146}
1147
1149 if (!FromVT.isInteger() || !ToVT.isInteger())
1150 return false;
1151 unsigned FromBits = FromVT.getFixedSizeInBits();
1152 unsigned ToBits = ToVT.getFixedSizeInBits();
1153 return FromBits > ToBits;
1154}
1155
1156//===----------------------------------------------------------------------===//
1157// Inline asm support
1158//===----------------------------------------------------------------------===//
1159
1162 if (Constraint.size() == 1) {
1163 switch (Constraint[0]) {
1164 case 'a': // Address register
1165 case 'd': // Data register (equivalent to 'r')
1166 case 'f': // Floating-point register
1167 case 'h': // High-part register
1168 case 'r': // General-purpose register
1169 case 'v': // Vector register
1170 return C_RegisterClass;
1171
1172 case 'Q': // Memory with base and unsigned 12-bit displacement
1173 case 'R': // Likewise, plus an index
1174 case 'S': // Memory with base and signed 20-bit displacement
1175 case 'T': // Likewise, plus an index
1176 case 'm': // Equivalent to 'T'.
1177 return C_Memory;
1178
1179 case 'I': // Unsigned 8-bit constant
1180 case 'J': // Unsigned 12-bit constant
1181 case 'K': // Signed 16-bit constant
1182 case 'L': // Signed 20-bit displacement (on all targets we support)
1183 case 'M': // 0x7fffffff
1184 return C_Immediate;
1185
1186 default:
1187 break;
1188 }
1189 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1190 switch (Constraint[1]) {
1191 case 'Q': // Address with base and unsigned 12-bit displacement
1192 case 'R': // Likewise, plus an index
1193 case 'S': // Address with base and signed 20-bit displacement
1194 case 'T': // Likewise, plus an index
1195 return C_Address;
1196
1197 default:
1198 break;
1199 }
1200 }
1201 return TargetLowering::getConstraintType(Constraint);
1202}
1203
1206 const char *constraint) const {
1208 Value *CallOperandVal = info.CallOperandVal;
1209 // If we don't have a value, we can't do a match,
1210 // but allow it at the lowest weight.
1211 if (!CallOperandVal)
1212 return CW_Default;
1213 Type *type = CallOperandVal->getType();
1214 // Look at the constraint type.
1215 switch (*constraint) {
1216 default:
1218 break;
1219
1220 case 'a': // Address register
1221 case 'd': // Data register (equivalent to 'r')
1222 case 'h': // High-part register
1223 case 'r': // General-purpose register
1224 weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1225 break;
1226
1227 case 'f': // Floating-point register
1228 if (!useSoftFloat())
1229 weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1230 break;
1231
1232 case 'v': // Vector register
1233 if (Subtarget.hasVector())
1234 weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1235 : CW_Default;
1236 break;
1237
1238 case 'I': // Unsigned 8-bit constant
1239 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1240 if (isUInt<8>(C->getZExtValue()))
1241 weight = CW_Constant;
1242 break;
1243
1244 case 'J': // Unsigned 12-bit constant
1245 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1246 if (isUInt<12>(C->getZExtValue()))
1247 weight = CW_Constant;
1248 break;
1249
1250 case 'K': // Signed 16-bit constant
1251 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1252 if (isInt<16>(C->getSExtValue()))
1253 weight = CW_Constant;
1254 break;
1255
1256 case 'L': // Signed 20-bit displacement (on all targets we support)
1257 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1258 if (isInt<20>(C->getSExtValue()))
1259 weight = CW_Constant;
1260 break;
1261
1262 case 'M': // 0x7fffffff
1263 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1264 if (C->getZExtValue() == 0x7fffffff)
1265 weight = CW_Constant;
1266 break;
1267 }
1268 return weight;
1269}
1270
1271// Parse a "{tNNN}" register constraint for which the register type "t"
1272// has already been verified. MC is the class associated with "t" and
1273// Map maps 0-based register numbers to LLVM register numbers.
1274static std::pair<unsigned, const TargetRegisterClass *>
1276 const unsigned *Map, unsigned Size) {
1277 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1278 if (isdigit(Constraint[2])) {
1279 unsigned Index;
1280 bool Failed =
1281 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1282 if (!Failed && Index < Size && Map[Index])
1283 return std::make_pair(Map[Index], RC);
1284 }
1285 return std::make_pair(0U, nullptr);
1286}
1287
1288std::pair<unsigned, const TargetRegisterClass *>
1290 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1291 if (Constraint.size() == 1) {
1292 // GCC Constraint Letters
1293 switch (Constraint[0]) {
1294 default: break;
1295 case 'd': // Data register (equivalent to 'r')
1296 case 'r': // General-purpose register
1297 if (VT.getSizeInBits() == 64)
1298 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1299 else if (VT.getSizeInBits() == 128)
1300 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1301 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1302
1303 case 'a': // Address register
1304 if (VT == MVT::i64)
1305 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1306 else if (VT == MVT::i128)
1307 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1308 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1309
1310 case 'h': // High-part register (an LLVM extension)
1311 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1312
1313 case 'f': // Floating-point register
1314 if (!useSoftFloat()) {
1315 if (VT.getSizeInBits() == 64)
1316 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1317 else if (VT.getSizeInBits() == 128)
1318 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1319 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1320 }
1321 break;
1322
1323 case 'v': // Vector register
1324 if (Subtarget.hasVector()) {
1325 if (VT.getSizeInBits() == 32)
1326 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1327 if (VT.getSizeInBits() == 64)
1328 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1329 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1330 }
1331 break;
1332 }
1333 }
1334 if (Constraint.starts_with("{")) {
1335
1336 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1337 // to check the size on.
1338 auto getVTSizeInBits = [&VT]() {
1339 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1340 };
1341
1342 // We need to override the default register parsing for GPRs and FPRs
1343 // because the interpretation depends on VT. The internal names of
1344 // the registers are also different from the external names
1345 // (F0D and F0S instead of F0, etc.).
1346 if (Constraint[1] == 'r') {
1347 if (getVTSizeInBits() == 32)
1348 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1350 if (getVTSizeInBits() == 128)
1351 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1353 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1355 }
1356 if (Constraint[1] == 'f') {
1357 if (useSoftFloat())
1358 return std::make_pair(
1359 0u, static_cast<const TargetRegisterClass *>(nullptr));
1360 if (getVTSizeInBits() == 32)
1361 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1363 if (getVTSizeInBits() == 128)
1364 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1366 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1368 }
1369 if (Constraint[1] == 'v') {
1370 if (!Subtarget.hasVector())
1371 return std::make_pair(
1372 0u, static_cast<const TargetRegisterClass *>(nullptr));
1373 if (getVTSizeInBits() == 32)
1374 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1376 if (getVTSizeInBits() == 64)
1377 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1379 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1381 }
1382 }
1383 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1384}
1385
1386// FIXME? Maybe this could be a TableGen attribute on some registers and
1387// this table could be generated automatically from RegInfo.
1390 const MachineFunction &MF) const {
1391 Register Reg =
1393 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D : 0)
1394 .Case("r15", Subtarget.isTargetELF() ? SystemZ::R15D : 0)
1395 .Default(0);
1396
1397 if (Reg)
1398 return Reg;
1399 report_fatal_error("Invalid register name global variable");
1400}
1401
1403 const Constant *PersonalityFn) const {
1404 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1405}
1406
1408 const Constant *PersonalityFn) const {
1409 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1410}
1411
1413 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1414 SelectionDAG &DAG) const {
1415 // Only support length 1 constraints for now.
1416 if (Constraint.size() == 1) {
1417 switch (Constraint[0]) {
1418 case 'I': // Unsigned 8-bit constant
1419 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1420 if (isUInt<8>(C->getZExtValue()))
1421 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1422 Op.getValueType()));
1423 return;
1424
1425 case 'J': // Unsigned 12-bit constant
1426 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1427 if (isUInt<12>(C->getZExtValue()))
1428 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1429 Op.getValueType()));
1430 return;
1431
1432 case 'K': // Signed 16-bit constant
1433 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1434 if (isInt<16>(C->getSExtValue()))
1435 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1436 Op.getValueType()));
1437 return;
1438
1439 case 'L': // Signed 20-bit displacement (on all targets we support)
1440 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1441 if (isInt<20>(C->getSExtValue()))
1442 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1443 Op.getValueType()));
1444 return;
1445
1446 case 'M': // 0x7fffffff
1447 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1448 if (C->getZExtValue() == 0x7fffffff)
1449 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1450 Op.getValueType()));
1451 return;
1452 }
1453 }
1454 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1455}
1456
1457//===----------------------------------------------------------------------===//
1458// Calling conventions
1459//===----------------------------------------------------------------------===//
1460
1461#include "SystemZGenCallingConv.inc"
1462
1464 CallingConv::ID) const {
1465 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1466 SystemZ::R14D, 0 };
1467 return ScratchRegs;
1468}
1469
1471 Type *ToType) const {
1472 return isTruncateFree(FromType, ToType);
1473}
1474
1476 return CI->isTailCall();
1477}
1478
1479// Value is a value that has been passed to us in the location described by VA
1480// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1481// any loads onto Chain.
1483 CCValAssign &VA, SDValue Chain,
1484 SDValue Value) {
1485 // If the argument has been promoted from a smaller type, insert an
1486 // assertion to capture this.
1487 if (VA.getLocInfo() == CCValAssign::SExt)
1489 DAG.getValueType(VA.getValVT()));
1490 else if (VA.getLocInfo() == CCValAssign::ZExt)
1492 DAG.getValueType(VA.getValVT()));
1493
1494 if (VA.isExtInLoc())
1495 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1496 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1497 // If this is a short vector argument loaded from the stack,
1498 // extend from i64 to full vector size and then bitcast.
1499 assert(VA.getLocVT() == MVT::i64);
1500 assert(VA.getValVT().isVector());
1501 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1502 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1503 } else
1504 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1505 return Value;
1506}
1507
1508// Value is a value of type VA.getValVT() that we need to copy into
1509// the location described by VA. Return a copy of Value converted to
1510// VA.getValVT(). The caller is responsible for handling indirect values.
1512 CCValAssign &VA, SDValue Value) {
1513 switch (VA.getLocInfo()) {
1514 case CCValAssign::SExt:
1515 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1516 case CCValAssign::ZExt:
1517 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1518 case CCValAssign::AExt:
1519 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1520 case CCValAssign::BCvt: {
1521 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1522 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1523 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1524 // For an f32 vararg we need to first promote it to an f64 and then
1525 // bitcast it to an i64.
1526 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1527 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1528 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1529 ? MVT::v2i64
1530 : VA.getLocVT();
1531 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1532 // For ELF, this is a short vector argument to be stored to the stack,
1533 // bitcast to v2i64 and then extract first element.
1534 if (BitCastToType == MVT::v2i64)
1535 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1536 DAG.getConstant(0, DL, MVT::i32));
1537 return Value;
1538 }
1539 case CCValAssign::Full:
1540 return Value;
1541 default:
1542 llvm_unreachable("Unhandled getLocInfo()");
1543 }
1544}
1545
1547 SDLoc DL(In);
1548 SDValue Lo, Hi;
1549 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1550 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1551 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1552 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1553 DAG.getConstant(64, DL, MVT::i32)));
1554 } else {
1555 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1556 }
1557
1558 // FIXME: If v2i64 were a legal type, we could use it instead of
1559 // Untyped here. This might enable improved folding.
1560 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1561 MVT::Untyped, Hi, Lo);
1562 return SDValue(Pair, 0);
1563}
1564
1566 SDLoc DL(In);
1567 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1568 DL, MVT::i64, In);
1569 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1570 DL, MVT::i64, In);
1571
1572 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1573 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1574 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1575 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1576 DAG.getConstant(64, DL, MVT::i32));
1577 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1578 } else {
1579 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1580 }
1581}
1582
1584 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1585 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1586 EVT ValueVT = Val.getValueType();
1587 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1588 // Inline assembly operand.
1589 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1590 return true;
1591 }
1592
1593 return false;
1594}
1595
1597 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1598 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1599 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1600 // Inline assembly operand.
1601 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1602 return DAG.getBitcast(ValueVT, Res);
1603 }
1604
1605 return SDValue();
1606}
1607
1609 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1610 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1611 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1613 MachineFrameInfo &MFI = MF.getFrameInfo();
1615 SystemZMachineFunctionInfo *FuncInfo =
1617 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1618 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1619
1620 // Assign locations to all of the incoming arguments.
1622 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1623 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1624 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
1625
1626 unsigned NumFixedGPRs = 0;
1627 unsigned NumFixedFPRs = 0;
1628 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1629 SDValue ArgValue;
1630 CCValAssign &VA = ArgLocs[I];
1631 EVT LocVT = VA.getLocVT();
1632 if (VA.isRegLoc()) {
1633 // Arguments passed in registers
1634 const TargetRegisterClass *RC;
1635 switch (LocVT.getSimpleVT().SimpleTy) {
1636 default:
1637 // Integers smaller than i64 should be promoted to i64.
1638 llvm_unreachable("Unexpected argument type");
1639 case MVT::i32:
1640 NumFixedGPRs += 1;
1641 RC = &SystemZ::GR32BitRegClass;
1642 break;
1643 case MVT::i64:
1644 NumFixedGPRs += 1;
1645 RC = &SystemZ::GR64BitRegClass;
1646 break;
1647 case MVT::f32:
1648 NumFixedFPRs += 1;
1649 RC = &SystemZ::FP32BitRegClass;
1650 break;
1651 case MVT::f64:
1652 NumFixedFPRs += 1;
1653 RC = &SystemZ::FP64BitRegClass;
1654 break;
1655 case MVT::f128:
1656 NumFixedFPRs += 2;
1657 RC = &SystemZ::FP128BitRegClass;
1658 break;
1659 case MVT::v16i8:
1660 case MVT::v8i16:
1661 case MVT::v4i32:
1662 case MVT::v2i64:
1663 case MVT::v4f32:
1664 case MVT::v2f64:
1665 RC = &SystemZ::VR128BitRegClass;
1666 break;
1667 }
1668
1669 Register VReg = MRI.createVirtualRegister(RC);
1670 MRI.addLiveIn(VA.getLocReg(), VReg);
1671 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1672 } else {
1673 assert(VA.isMemLoc() && "Argument not register or memory");
1674
1675 // Create the frame index object for this incoming parameter.
1676 // FIXME: Pre-include call frame size in the offset, should not
1677 // need to manually add it here.
1678 int64_t ArgSPOffset = VA.getLocMemOffset();
1679 if (Subtarget.isTargetXPLINK64()) {
1680 auto &XPRegs =
1682 ArgSPOffset += XPRegs.getCallFrameSize();
1683 }
1684 int FI =
1685 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1686
1687 // Create the SelectionDAG nodes corresponding to a load
1688 // from this parameter. Unpromoted ints and floats are
1689 // passed as right-justified 8-byte values.
1690 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1691 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1692 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1693 DAG.getIntPtrConstant(4, DL));
1694 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1696 }
1697
1698 // Convert the value of the argument register into the value that's
1699 // being passed.
1700 if (VA.getLocInfo() == CCValAssign::Indirect) {
1701 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1703 // If the original argument was split (e.g. i128), we need
1704 // to load all parts of it here (using the same address).
1705 unsigned ArgIndex = Ins[I].OrigArgIndex;
1706 assert (Ins[I].PartOffset == 0);
1707 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1708 CCValAssign &PartVA = ArgLocs[I + 1];
1709 unsigned PartOffset = Ins[I + 1].PartOffset;
1710 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1711 DAG.getIntPtrConstant(PartOffset, DL));
1712 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1714 ++I;
1715 }
1716 } else
1717 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1718 }
1719
1720 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
1721 // Save the number of non-varargs registers for later use by va_start, etc.
1722 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1723 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1724
1725 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1726 Subtarget.getSpecialRegisters());
1727
1728 // Likewise the address (in the form of a frame index) of where the
1729 // first stack vararg would be. The 1-byte size here is arbitrary.
1730 // FIXME: Pre-include call frame size in the offset, should not
1731 // need to manually add it here.
1732 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
1733 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
1734 FuncInfo->setVarArgsFrameIndex(FI);
1735 }
1736
1737 if (IsVarArg && Subtarget.isTargetELF()) {
1738 // Save the number of non-varargs registers for later use by va_start, etc.
1739 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1740 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1741
1742 // Likewise the address (in the form of a frame index) of where the
1743 // first stack vararg would be. The 1-byte size here is arbitrary.
1744 int64_t VarArgsOffset = CCInfo.getStackSize();
1745 FuncInfo->setVarArgsFrameIndex(
1746 MFI.CreateFixedObject(1, VarArgsOffset, true));
1747
1748 // ...and a similar frame index for the caller-allocated save area
1749 // that will be used to store the incoming registers.
1750 int64_t RegSaveOffset =
1751 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1752 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1753 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1754
1755 // Store the FPR varargs in the reserved frame slots. (We store the
1756 // GPRs as part of the prologue.)
1757 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1759 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1760 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1761 int FI =
1763 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1765 &SystemZ::FP64BitRegClass);
1766 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1767 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1769 }
1770 // Join the stores, which are independent of one another.
1771 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1772 ArrayRef(&MemOps[NumFixedFPRs],
1773 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
1774 }
1775 }
1776
1777 if (Subtarget.isTargetXPLINK64()) {
1778 // Create virual register for handling incoming "ADA" special register (R5)
1779 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
1780 Register ADAvReg = MRI.createVirtualRegister(RC);
1781 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1782 Subtarget.getSpecialRegisters());
1783 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
1784 FuncInfo->setADAVirtualRegister(ADAvReg);
1785 }
1786 return Chain;
1787}
1788
1789static bool canUseSiblingCall(const CCState &ArgCCInfo,
1792 // Punt if there are any indirect or stack arguments, or if the call
1793 // needs the callee-saved argument register R6, or if the call uses
1794 // the callee-saved register arguments SwiftSelf and SwiftError.
1795 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1796 CCValAssign &VA = ArgLocs[I];
1798 return false;
1799 if (!VA.isRegLoc())
1800 return false;
1801 Register Reg = VA.getLocReg();
1802 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1803 return false;
1804 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1805 return false;
1806 }
1807 return true;
1808}
1809
1811 unsigned Offset, bool LoadAdr = false) {
1814 unsigned ADAvReg = MFI->getADAVirtualRegister();
1816
1817 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
1818 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
1819
1820 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
1821 if (!LoadAdr)
1822 Result = DAG.getLoad(
1823 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
1825
1826 return Result;
1827}
1828
1829// ADA access using Global value
1830// Note: for functions, address of descriptor is returned
1832 EVT PtrVT) {
1833 unsigned ADAtype;
1834 bool LoadAddr = false;
1835 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
1836 bool IsFunction =
1837 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
1838 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
1839
1840 if (IsFunction) {
1841 if (IsInternal) {
1843 LoadAddr = true;
1844 } else
1846 } else {
1848 }
1849 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
1850
1851 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
1852}
1853
1854static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
1855 SDLoc &DL, SDValue &Chain) {
1856 unsigned ADADelta = 0; // ADA offset in desc.
1857 unsigned EPADelta = 8; // EPA offset in desc.
1860
1861 // XPLink calling convention.
1862 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1863 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
1864 G->getGlobal()->hasPrivateLinkage());
1865 if (IsInternal) {
1868 unsigned ADAvReg = MFI->getADAVirtualRegister();
1869 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
1870 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1871 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1872 return true;
1873 } else {
1875 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
1876 ADA = getADAEntry(DAG, GA, DL, ADADelta);
1877 Callee = getADAEntry(DAG, GA, DL, EPADelta);
1878 }
1879 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1881 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
1882 ADA = getADAEntry(DAG, ES, DL, ADADelta);
1883 Callee = getADAEntry(DAG, ES, DL, EPADelta);
1884 } else {
1885 // Function pointer case
1886 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
1887 DAG.getConstant(ADADelta, DL, PtrVT));
1888 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
1890 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
1891 DAG.getConstant(EPADelta, DL, PtrVT));
1892 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
1894 }
1895 return false;
1896}
1897
1898SDValue
1900 SmallVectorImpl<SDValue> &InVals) const {
1901 SelectionDAG &DAG = CLI.DAG;
1902 SDLoc &DL = CLI.DL;
1904 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1906 SDValue Chain = CLI.Chain;
1907 SDValue Callee = CLI.Callee;
1908 bool &IsTailCall = CLI.IsTailCall;
1909 CallingConv::ID CallConv = CLI.CallConv;
1910 bool IsVarArg = CLI.IsVarArg;
1912 EVT PtrVT = getPointerTy(MF.getDataLayout());
1913 LLVMContext &Ctx = *DAG.getContext();
1915
1916 // FIXME: z/OS support to be added in later.
1917 if (Subtarget.isTargetXPLINK64())
1918 IsTailCall = false;
1919
1920 // Analyze the operands of the call, assigning locations to each operand.
1922 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1923 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1924
1925 // We don't support GuaranteedTailCallOpt, only automatically-detected
1926 // sibling calls.
1927 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1928 IsTailCall = false;
1929
1930 // Get a count of how many bytes are to be pushed on the stack.
1931 unsigned NumBytes = ArgCCInfo.getStackSize();
1932
1933 // Mark the start of the call.
1934 if (!IsTailCall)
1935 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1936
1937 // Copy argument values to their designated locations.
1939 SmallVector<SDValue, 8> MemOpChains;
1940 SDValue StackPtr;
1941 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1942 CCValAssign &VA = ArgLocs[I];
1943 SDValue ArgValue = OutVals[I];
1944
1945 if (VA.getLocInfo() == CCValAssign::Indirect) {
1946 // Store the argument in a stack slot and pass its address.
1947 unsigned ArgIndex = Outs[I].OrigArgIndex;
1948 EVT SlotVT;
1949 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1950 // Allocate the full stack space for a promoted (and split) argument.
1951 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1952 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1953 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1954 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1955 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1956 } else {
1957 SlotVT = Outs[I].VT;
1958 }
1959 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1960 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1961 MemOpChains.push_back(
1962 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1964 // If the original argument was split (e.g. i128), we need
1965 // to store all parts of it here (and pass just one address).
1966 assert (Outs[I].PartOffset == 0);
1967 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1968 SDValue PartValue = OutVals[I + 1];
1969 unsigned PartOffset = Outs[I + 1].PartOffset;
1970 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1971 DAG.getIntPtrConstant(PartOffset, DL));
1972 MemOpChains.push_back(
1973 DAG.getStore(Chain, DL, PartValue, Address,
1975 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
1976 SlotVT.getStoreSize()) && "Not enough space for argument part!");
1977 ++I;
1978 }
1979 ArgValue = SpillSlot;
1980 } else
1981 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1982
1983 if (VA.isRegLoc()) {
1984 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
1985 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
1986 // and low values.
1987 if (VA.getLocVT() == MVT::i128)
1988 ArgValue = lowerI128ToGR128(DAG, ArgValue);
1989 // Queue up the argument copies and emit them at the end.
1990 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1991 } else {
1992 assert(VA.isMemLoc() && "Argument not register or memory");
1993
1994 // Work out the address of the stack slot. Unpromoted ints and
1995 // floats are passed as right-justified 8-byte values.
1996 if (!StackPtr.getNode())
1997 StackPtr = DAG.getCopyFromReg(Chain, DL,
1998 Regs->getStackPointerRegister(), PtrVT);
1999 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
2000 VA.getLocMemOffset();
2001 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
2002 Offset += 4;
2003 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2005
2006 // Emit the store.
2007 MemOpChains.push_back(
2008 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2009
2010 // Although long doubles or vectors are passed through the stack when
2011 // they are vararg (non-fixed arguments), if a long double or vector
2012 // occupies the third and fourth slot of the argument list GPR3 should
2013 // still shadow the third slot of the argument list.
2014 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2015 SDValue ShadowArgValue =
2016 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2017 DAG.getIntPtrConstant(1, DL));
2018 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2019 }
2020 }
2021 }
2022
2023 // Join the stores, which are independent of one another.
2024 if (!MemOpChains.empty())
2025 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2026
2027 // Accept direct calls by converting symbolic call addresses to the
2028 // associated Target* opcodes. Force %r1 to be used for indirect
2029 // tail calls.
2030 SDValue Glue;
2031
2032 if (Subtarget.isTargetXPLINK64()) {
2033 SDValue ADA;
2034 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2035 if (!IsBRASL) {
2036 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2037 ->getAddressOfCalleeRegister();
2038 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2039 Glue = Chain.getValue(1);
2040 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2041 }
2042 RegsToPass.push_back(std::make_pair(
2043 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2044 } else {
2045 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2046 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2047 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2048 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2049 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2050 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2051 } else if (IsTailCall) {
2052 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2053 Glue = Chain.getValue(1);
2054 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2055 }
2056 }
2057
2058 // Build a sequence of copy-to-reg nodes, chained and glued together.
2059 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
2060 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
2061 RegsToPass[I].second, Glue);
2062 Glue = Chain.getValue(1);
2063 }
2064
2065 // The first call operand is the chain and the second is the target address.
2067 Ops.push_back(Chain);
2068 Ops.push_back(Callee);
2069
2070 // Add argument registers to the end of the list so that they are
2071 // known live into the call.
2072 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
2073 Ops.push_back(DAG.getRegister(RegsToPass[I].first,
2074 RegsToPass[I].second.getValueType()));
2075
2076 // Add a register mask operand representing the call-preserved registers.
2077 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2078 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2079 assert(Mask && "Missing call preserved mask for calling convention");
2080 Ops.push_back(DAG.getRegisterMask(Mask));
2081
2082 // Glue the call to the argument copies, if any.
2083 if (Glue.getNode())
2084 Ops.push_back(Glue);
2085
2086 // Emit the call.
2087 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2088 if (IsTailCall) {
2089 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2090 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2091 return Ret;
2092 }
2093 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2094 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2095 Glue = Chain.getValue(1);
2096
2097 // Mark the end of the call, which is glued to the call itself.
2098 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2099 Glue = Chain.getValue(1);
2100
2101 // Assign locations to each value returned by this call.
2103 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2104 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2105
2106 // Copy all of the result registers out of their specified physreg.
2107 for (CCValAssign &VA : RetLocs) {
2108 // Copy the value out, gluing the copy to the end of the call sequence.
2109 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2110 VA.getLocVT(), Glue);
2111 Chain = RetValue.getValue(1);
2112 Glue = RetValue.getValue(2);
2113
2114 // Convert the value of the return register into the value that's
2115 // being returned.
2116 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2117 }
2118
2119 return Chain;
2120}
2121
2122// Generate a call taking the given operands as arguments and returning a
2123// result of type RetVT.
2125 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2126 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2127 bool DoesNotReturn, bool IsReturnValueUsed) const {
2129 Args.reserve(Ops.size());
2130
2132 for (SDValue Op : Ops) {
2133 Entry.Node = Op;
2134 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
2135 Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
2136 Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
2137 Args.push_back(Entry);
2138 }
2139
2140 SDValue Callee =
2141 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2142
2143 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2145 bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned);
2146 CLI.setDebugLoc(DL)
2147 .setChain(Chain)
2148 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2149 .setNoReturn(DoesNotReturn)
2150 .setDiscardResult(!IsReturnValueUsed)
2151 .setSExtResult(SignExtend)
2152 .setZExtResult(!SignExtend);
2153 return LowerCallTo(CLI);
2154}
2155
2158 MachineFunction &MF, bool isVarArg,
2160 LLVMContext &Context) const {
2161 // Special case that we cannot easily detect in RetCC_SystemZ since
2162 // i128 may not be a legal type.
2163 for (auto &Out : Outs)
2164 if (Out.ArgVT == MVT::i128)
2165 return false;
2166
2168 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
2169 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2170}
2171
2172SDValue
2174 bool IsVarArg,
2176 const SmallVectorImpl<SDValue> &OutVals,
2177 const SDLoc &DL, SelectionDAG &DAG) const {
2179
2180 // Assign locations to each returned value.
2182 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2183 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2184
2185 // Quick exit for void returns
2186 if (RetLocs.empty())
2187 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2188
2189 if (CallConv == CallingConv::GHC)
2190 report_fatal_error("GHC functions return void only");
2191
2192 // Copy the result values into the output registers.
2193 SDValue Glue;
2195 RetOps.push_back(Chain);
2196 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2197 CCValAssign &VA = RetLocs[I];
2198 SDValue RetValue = OutVals[I];
2199
2200 // Make the return register live on exit.
2201 assert(VA.isRegLoc() && "Can only return in registers!");
2202
2203 // Promote the value as required.
2204 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2205
2206 // Chain and glue the copies together.
2207 Register Reg = VA.getLocReg();
2208 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2209 Glue = Chain.getValue(1);
2210 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2211 }
2212
2213 // Update chain and glue.
2214 RetOps[0] = Chain;
2215 if (Glue.getNode())
2216 RetOps.push_back(Glue);
2217
2218 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2219}
2220
2221// Return true if Op is an intrinsic node with chain that returns the CC value
2222// as its only (other) argument. Provide the associated SystemZISD opcode and
2223// the mask of valid CC values if so.
2224static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2225 unsigned &CCValid) {
2226 unsigned Id = Op.getConstantOperandVal(1);
2227 switch (Id) {
2228 case Intrinsic::s390_tbegin:
2229 Opcode = SystemZISD::TBEGIN;
2230 CCValid = SystemZ::CCMASK_TBEGIN;
2231 return true;
2232
2233 case Intrinsic::s390_tbegin_nofloat:
2235 CCValid = SystemZ::CCMASK_TBEGIN;
2236 return true;
2237
2238 case Intrinsic::s390_tend:
2239 Opcode = SystemZISD::TEND;
2240 CCValid = SystemZ::CCMASK_TEND;
2241 return true;
2242
2243 default:
2244 return false;
2245 }
2246}
2247
2248// Return true if Op is an intrinsic node without chain that returns the
2249// CC value as its final argument. Provide the associated SystemZISD
2250// opcode and the mask of valid CC values if so.
2251static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2252 unsigned Id = Op.getConstantOperandVal(0);
2253 switch (Id) {
2254 case Intrinsic::s390_vpkshs:
2255 case Intrinsic::s390_vpksfs:
2256 case Intrinsic::s390_vpksgs:
2257 Opcode = SystemZISD::PACKS_CC;
2258 CCValid = SystemZ::CCMASK_VCMP;
2259 return true;
2260
2261 case Intrinsic::s390_vpklshs:
2262 case Intrinsic::s390_vpklsfs:
2263 case Intrinsic::s390_vpklsgs:
2264 Opcode = SystemZISD::PACKLS_CC;
2265 CCValid = SystemZ::CCMASK_VCMP;
2266 return true;
2267
2268 case Intrinsic::s390_vceqbs:
2269 case Intrinsic::s390_vceqhs:
2270 case Intrinsic::s390_vceqfs:
2271 case Intrinsic::s390_vceqgs:
2272 Opcode = SystemZISD::VICMPES;
2273 CCValid = SystemZ::CCMASK_VCMP;
2274 return true;
2275
2276 case Intrinsic::s390_vchbs:
2277 case Intrinsic::s390_vchhs:
2278 case Intrinsic::s390_vchfs:
2279 case Intrinsic::s390_vchgs:
2280 Opcode = SystemZISD::VICMPHS;
2281 CCValid = SystemZ::CCMASK_VCMP;
2282 return true;
2283
2284 case Intrinsic::s390_vchlbs:
2285 case Intrinsic::s390_vchlhs:
2286 case Intrinsic::s390_vchlfs:
2287 case Intrinsic::s390_vchlgs:
2288 Opcode = SystemZISD::VICMPHLS;
2289 CCValid = SystemZ::CCMASK_VCMP;
2290 return true;
2291
2292 case Intrinsic::s390_vtm:
2293 Opcode = SystemZISD::VTM;
2294 CCValid = SystemZ::CCMASK_VCMP;
2295 return true;
2296
2297 case Intrinsic::s390_vfaebs:
2298 case Intrinsic::s390_vfaehs:
2299 case Intrinsic::s390_vfaefs:
2300 Opcode = SystemZISD::VFAE_CC;
2301 CCValid = SystemZ::CCMASK_ANY;
2302 return true;
2303
2304 case Intrinsic::s390_vfaezbs:
2305 case Intrinsic::s390_vfaezhs:
2306 case Intrinsic::s390_vfaezfs:
2307 Opcode = SystemZISD::VFAEZ_CC;
2308 CCValid = SystemZ::CCMASK_ANY;
2309 return true;
2310
2311 case Intrinsic::s390_vfeebs:
2312 case Intrinsic::s390_vfeehs:
2313 case Intrinsic::s390_vfeefs:
2314 Opcode = SystemZISD::VFEE_CC;
2315 CCValid = SystemZ::CCMASK_ANY;
2316 return true;
2317
2318 case Intrinsic::s390_vfeezbs:
2319 case Intrinsic::s390_vfeezhs:
2320 case Intrinsic::s390_vfeezfs:
2321 Opcode = SystemZISD::VFEEZ_CC;
2322 CCValid = SystemZ::CCMASK_ANY;
2323 return true;
2324
2325 case Intrinsic::s390_vfenebs:
2326 case Intrinsic::s390_vfenehs:
2327 case Intrinsic::s390_vfenefs:
2328 Opcode = SystemZISD::VFENE_CC;
2329 CCValid = SystemZ::CCMASK_ANY;
2330 return true;
2331
2332 case Intrinsic::s390_vfenezbs:
2333 case Intrinsic::s390_vfenezhs:
2334 case Intrinsic::s390_vfenezfs:
2335 Opcode = SystemZISD::VFENEZ_CC;
2336 CCValid = SystemZ::CCMASK_ANY;
2337 return true;
2338
2339 case Intrinsic::s390_vistrbs:
2340 case Intrinsic::s390_vistrhs:
2341 case Intrinsic::s390_vistrfs:
2342 Opcode = SystemZISD::VISTR_CC;
2344 return true;
2345
2346 case Intrinsic::s390_vstrcbs:
2347 case Intrinsic::s390_vstrchs:
2348 case Intrinsic::s390_vstrcfs:
2349 Opcode = SystemZISD::VSTRC_CC;
2350 CCValid = SystemZ::CCMASK_ANY;
2351 return true;
2352
2353 case Intrinsic::s390_vstrczbs:
2354 case Intrinsic::s390_vstrczhs:
2355 case Intrinsic::s390_vstrczfs:
2356 Opcode = SystemZISD::VSTRCZ_CC;
2357 CCValid = SystemZ::CCMASK_ANY;
2358 return true;
2359
2360 case Intrinsic::s390_vstrsb:
2361 case Intrinsic::s390_vstrsh:
2362 case Intrinsic::s390_vstrsf:
2363 Opcode = SystemZISD::VSTRS_CC;
2364 CCValid = SystemZ::CCMASK_ANY;
2365 return true;
2366
2367 case Intrinsic::s390_vstrszb:
2368 case Intrinsic::s390_vstrszh:
2369 case Intrinsic::s390_vstrszf:
2370 Opcode = SystemZISD::VSTRSZ_CC;
2371 CCValid = SystemZ::CCMASK_ANY;
2372 return true;
2373
2374 case Intrinsic::s390_vfcedbs:
2375 case Intrinsic::s390_vfcesbs:
2376 Opcode = SystemZISD::VFCMPES;
2377 CCValid = SystemZ::CCMASK_VCMP;
2378 return true;
2379
2380 case Intrinsic::s390_vfchdbs:
2381 case Intrinsic::s390_vfchsbs:
2382 Opcode = SystemZISD::VFCMPHS;
2383 CCValid = SystemZ::CCMASK_VCMP;
2384 return true;
2385
2386 case Intrinsic::s390_vfchedbs:
2387 case Intrinsic::s390_vfchesbs:
2388 Opcode = SystemZISD::VFCMPHES;
2389 CCValid = SystemZ::CCMASK_VCMP;
2390 return true;
2391
2392 case Intrinsic::s390_vftcidb:
2393 case Intrinsic::s390_vftcisb:
2394 Opcode = SystemZISD::VFTCI;
2395 CCValid = SystemZ::CCMASK_VCMP;
2396 return true;
2397
2398 case Intrinsic::s390_tdc:
2399 Opcode = SystemZISD::TDC;
2400 CCValid = SystemZ::CCMASK_TDC;
2401 return true;
2402
2403 default:
2404 return false;
2405 }
2406}
2407
2408// Emit an intrinsic with chain and an explicit CC register result.
2410 unsigned Opcode) {
2411 // Copy all operands except the intrinsic ID.
2412 unsigned NumOps = Op.getNumOperands();
2414 Ops.reserve(NumOps - 1);
2415 Ops.push_back(Op.getOperand(0));
2416 for (unsigned I = 2; I < NumOps; ++I)
2417 Ops.push_back(Op.getOperand(I));
2418
2419 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2420 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2421 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2422 SDValue OldChain = SDValue(Op.getNode(), 1);
2423 SDValue NewChain = SDValue(Intr.getNode(), 1);
2424 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2425 return Intr.getNode();
2426}
2427
2428// Emit an intrinsic with an explicit CC register result.
2430 unsigned Opcode) {
2431 // Copy all operands except the intrinsic ID.
2432 unsigned NumOps = Op.getNumOperands();
2434 Ops.reserve(NumOps - 1);
2435 for (unsigned I = 1; I < NumOps; ++I)
2436 Ops.push_back(Op.getOperand(I));
2437
2438 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2439 return Intr.getNode();
2440}
2441
2442// CC is a comparison that will be implemented using an integer or
2443// floating-point comparison. Return the condition code mask for
2444// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2445// unsigned comparisons and clear for signed ones. In the floating-point
2446// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2448#define CONV(X) \
2449 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2450 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2451 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2452
2453 switch (CC) {
2454 default:
2455 llvm_unreachable("Invalid integer condition!");
2456
2457 CONV(EQ);
2458 CONV(NE);
2459 CONV(GT);
2460 CONV(GE);
2461 CONV(LT);
2462 CONV(LE);
2463
2464 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2466 }
2467#undef CONV
2468}
2469
2470// If C can be converted to a comparison against zero, adjust the operands
2471// as necessary.
2472static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2473 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2474 return;
2475
2476 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2477 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2478 return;
2479
2480 int64_t Value = ConstOp1->getSExtValue();
2481 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2482 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2483 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2484 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2485 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2486 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2487 }
2488}
2489
2490// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2491// adjust the operands as necessary.
2492static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2493 Comparison &C) {
2494 // For us to make any changes, it must a comparison between a single-use
2495 // load and a constant.
2496 if (!C.Op0.hasOneUse() ||
2497 C.Op0.getOpcode() != ISD::LOAD ||
2498 C.Op1.getOpcode() != ISD::Constant)
2499 return;
2500
2501 // We must have an 8- or 16-bit load.
2502 auto *Load = cast<LoadSDNode>(C.Op0);
2503 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2504 if ((NumBits != 8 && NumBits != 16) ||
2505 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2506 return;
2507
2508 // The load must be an extending one and the constant must be within the
2509 // range of the unextended value.
2510 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2511 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2512 return;
2513 uint64_t Value = ConstOp1->getZExtValue();
2514 uint64_t Mask = (1 << NumBits) - 1;
2515 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2516 // Make sure that ConstOp1 is in range of C.Op0.
2517 int64_t SignedValue = ConstOp1->getSExtValue();
2518 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2519 return;
2520 if (C.ICmpType != SystemZICMP::SignedOnly) {
2521 // Unsigned comparison between two sign-extended values is equivalent
2522 // to unsigned comparison between two zero-extended values.
2523 Value &= Mask;
2524 } else if (NumBits == 8) {
2525 // Try to treat the comparison as unsigned, so that we can use CLI.
2526 // Adjust CCMask and Value as necessary.
2527 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2528 // Test whether the high bit of the byte is set.
2529 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2530 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2531 // Test whether the high bit of the byte is clear.
2532 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2533 else
2534 // No instruction exists for this combination.
2535 return;
2536 C.ICmpType = SystemZICMP::UnsignedOnly;
2537 }
2538 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2539 if (Value > Mask)
2540 return;
2541 // If the constant is in range, we can use any comparison.
2542 C.ICmpType = SystemZICMP::Any;
2543 } else
2544 return;
2545
2546 // Make sure that the first operand is an i32 of the right extension type.
2547 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2550 if (C.Op0.getValueType() != MVT::i32 ||
2551 Load->getExtensionType() != ExtType) {
2552 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2553 Load->getBasePtr(), Load->getPointerInfo(),
2554 Load->getMemoryVT(), Load->getAlign(),
2555 Load->getMemOperand()->getFlags());
2556 // Update the chain uses.
2557 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2558 }
2559
2560 // Make sure that the second operand is an i32 with the right value.
2561 if (C.Op1.getValueType() != MVT::i32 ||
2562 Value != ConstOp1->getZExtValue())
2563 C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2564}
2565
2566// Return true if Op is either an unextended load, or a load suitable
2567// for integer register-memory comparisons of type ICmpType.
2568static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2569 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2570 if (Load) {
2571 // There are no instructions to compare a register with a memory byte.
2572 if (Load->getMemoryVT() == MVT::i8)
2573 return false;
2574 // Otherwise decide on extension type.
2575 switch (Load->getExtensionType()) {
2576 case ISD::NON_EXTLOAD:
2577 return true;
2578 case ISD::SEXTLOAD:
2579 return ICmpType != SystemZICMP::UnsignedOnly;
2580 case ISD::ZEXTLOAD:
2581 return ICmpType != SystemZICMP::SignedOnly;
2582 default:
2583 break;
2584 }
2585 }
2586 return false;
2587}
2588
2589// Return true if it is better to swap the operands of C.
2590static bool shouldSwapCmpOperands(const Comparison &C) {
2591 // Leave i128 and f128 comparisons alone, since they have no memory forms.
2592 if (C.Op0.getValueType() == MVT::i128)
2593 return false;
2594 if (C.Op0.getValueType() == MVT::f128)
2595 return false;
2596
2597 // Always keep a floating-point constant second, since comparisons with
2598 // zero can use LOAD TEST and comparisons with other constants make a
2599 // natural memory operand.
2600 if (isa<ConstantFPSDNode>(C.Op1))
2601 return false;
2602
2603 // Never swap comparisons with zero since there are many ways to optimize
2604 // those later.
2605 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2606 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2607 return false;
2608
2609 // Also keep natural memory operands second if the loaded value is
2610 // only used here. Several comparisons have memory forms.
2611 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2612 return false;
2613
2614 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2615 // In that case we generally prefer the memory to be second.
2616 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2617 // The only exceptions are when the second operand is a constant and
2618 // we can use things like CHHSI.
2619 if (!ConstOp1)
2620 return true;
2621 // The unsigned memory-immediate instructions can handle 16-bit
2622 // unsigned integers.
2623 if (C.ICmpType != SystemZICMP::SignedOnly &&
2624 isUInt<16>(ConstOp1->getZExtValue()))
2625 return false;
2626 // The signed memory-immediate instructions can handle 16-bit
2627 // signed integers.
2628 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2629 isInt<16>(ConstOp1->getSExtValue()))
2630 return false;
2631 return true;
2632 }
2633
2634 // Try to promote the use of CGFR and CLGFR.
2635 unsigned Opcode0 = C.Op0.getOpcode();
2636 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2637 return true;
2638 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2639 return true;
2640 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
2641 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2642 C.Op0.getConstantOperandVal(1) == 0xffffffff)
2643 return true;
2644
2645 return false;
2646}
2647
2648// Check whether C tests for equality between X and Y and whether X - Y
2649// or Y - X is also computed. In that case it's better to compare the
2650// result of the subtraction against zero.
2652 Comparison &C) {
2653 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2654 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2655 for (SDNode *N : C.Op0->uses()) {
2656 if (N->getOpcode() == ISD::SUB &&
2657 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2658 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2659 // Disable the nsw and nuw flags: the backend needs to handle
2660 // overflow as well during comparison elimination.
2661 SDNodeFlags Flags = N->getFlags();
2662 Flags.setNoSignedWrap(false);
2663 Flags.setNoUnsignedWrap(false);
2664 N->setFlags(Flags);
2665 C.Op0 = SDValue(N, 0);
2666 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2667 return;
2668 }
2669 }
2670 }
2671}
2672
2673// Check whether C compares a floating-point value with zero and if that
2674// floating-point value is also negated. In this case we can use the
2675// negation to set CC, so avoiding separate LOAD AND TEST and
2676// LOAD (NEGATIVE/COMPLEMENT) instructions.
2677static void adjustForFNeg(Comparison &C) {
2678 // This optimization is invalid for strict comparisons, since FNEG
2679 // does not raise any exceptions.
2680 if (C.Chain)
2681 return;
2682 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2683 if (C1 && C1->isZero()) {
2684 for (SDNode *N : C.Op0->uses()) {
2685 if (N->getOpcode() == ISD::FNEG) {
2686 C.Op0 = SDValue(N, 0);
2687 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2688 return;
2689 }
2690 }
2691 }
2692}
2693
2694// Check whether C compares (shl X, 32) with 0 and whether X is
2695// also sign-extended. In that case it is better to test the result
2696// of the sign extension using LTGFR.
2697//
2698// This case is important because InstCombine transforms a comparison
2699// with (sext (trunc X)) into a comparison with (shl X, 32).
2700static void adjustForLTGFR(Comparison &C) {
2701 // Check for a comparison between (shl X, 32) and 0.
2702 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
2703 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
2704 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2705 if (C1 && C1->getZExtValue() == 32) {
2706 SDValue ShlOp0 = C.Op0.getOperand(0);
2707 // See whether X has any SIGN_EXTEND_INREG uses.
2708 for (SDNode *N : ShlOp0->uses()) {
2709 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2710 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2711 C.Op0 = SDValue(N, 0);
2712 return;
2713 }
2714 }
2715 }
2716 }
2717}
2718
2719// If C compares the truncation of an extending load, try to compare
2720// the untruncated value instead. This exposes more opportunities to
2721// reuse CC.
2722static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2723 Comparison &C) {
2724 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2725 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2726 C.Op1.getOpcode() == ISD::Constant &&
2727 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
2728 C.Op1->getAsZExtVal() == 0) {
2729 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2730 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
2731 C.Op0.getValueSizeInBits().getFixedValue()) {
2732 unsigned Type = L->getExtensionType();
2733 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2734 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2735 C.Op0 = C.Op0.getOperand(0);
2736 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2737 }
2738 }
2739 }
2740}
2741
2742// Return true if shift operation N has an in-range constant shift value.
2743// Store it in ShiftVal if so.
2744static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2745 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2746 if (!Shift)
2747 return false;
2748
2749 uint64_t Amount = Shift->getZExtValue();
2750 if (Amount >= N.getValueSizeInBits())
2751 return false;
2752
2753 ShiftVal = Amount;
2754 return true;
2755}
2756
2757// Check whether an AND with Mask is suitable for a TEST UNDER MASK
2758// instruction and whether the CC value is descriptive enough to handle
2759// a comparison of type Opcode between the AND result and CmpVal.
2760// CCMask says which comparison result is being tested and BitSize is
2761// the number of bits in the operands. If TEST UNDER MASK can be used,
2762// return the corresponding CC mask, otherwise return 0.
2763static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2764 uint64_t Mask, uint64_t CmpVal,
2765 unsigned ICmpType) {
2766 assert(Mask != 0 && "ANDs with zero should have been removed by now");
2767
2768 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2769 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2770 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2771 return 0;
2772
2773 // Work out the masks for the lowest and highest bits.
2775 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
2776
2777 // Signed ordered comparisons are effectively unsigned if the sign
2778 // bit is dropped.
2779 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2780
2781 // Check for equality comparisons with 0, or the equivalent.
2782 if (CmpVal == 0) {
2783 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2785 if (CCMask == SystemZ::CCMASK_CMP_NE)
2787 }
2788 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2789 if (CCMask == SystemZ::CCMASK_CMP_LT)
2791 if (CCMask == SystemZ::CCMASK_CMP_GE)
2793 }
2794 if (EffectivelyUnsigned && CmpVal < Low) {
2795 if (CCMask == SystemZ::CCMASK_CMP_LE)
2797 if (CCMask == SystemZ::CCMASK_CMP_GT)
2799 }
2800
2801 // Check for equality comparisons with the mask, or the equivalent.
2802 if (CmpVal == Mask) {
2803 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2805 if (CCMask == SystemZ::CCMASK_CMP_NE)
2807 }
2808 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2809 if (CCMask == SystemZ::CCMASK_CMP_GT)
2811 if (CCMask == SystemZ::CCMASK_CMP_LE)
2813 }
2814 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2815 if (CCMask == SystemZ::CCMASK_CMP_GE)
2817 if (CCMask == SystemZ::CCMASK_CMP_LT)
2819 }
2820
2821 // Check for ordered comparisons with the top bit.
2822 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2823 if (CCMask == SystemZ::CCMASK_CMP_LE)
2825 if (CCMask == SystemZ::CCMASK_CMP_GT)
2827 }
2828 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2829 if (CCMask == SystemZ::CCMASK_CMP_LT)
2831 if (CCMask == SystemZ::CCMASK_CMP_GE)
2833 }
2834
2835 // If there are just two bits, we can do equality checks for Low and High
2836 // as well.
2837 if (Mask == Low + High) {
2838 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2840 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2842 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2844 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2846 }
2847
2848 // Looks like we've exhausted our options.
2849 return 0;
2850}
2851
2852// See whether C can be implemented as a TEST UNDER MASK instruction.
2853// Update the arguments with the TM version if so.
2855 Comparison &C) {
2856 // Use VECTOR TEST UNDER MASK for i128 operations.
2857 if (C.Op0.getValueType() == MVT::i128) {
2858 // We can use VTM for EQ/NE comparisons of x & y against 0.
2859 if (C.Op0.getOpcode() == ISD::AND &&
2860 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2861 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
2862 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
2863 if (Mask && Mask->getAPIntValue() == 0) {
2864 C.Opcode = SystemZISD::VTM;
2865 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
2866 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
2867 C.CCValid = SystemZ::CCMASK_VCMP;
2868 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
2869 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
2870 else
2871 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
2872 }
2873 }
2874 return;
2875 }
2876
2877 // Check that we have a comparison with a constant.
2878 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2879 if (!ConstOp1)
2880 return;
2881 uint64_t CmpVal = ConstOp1->getZExtValue();
2882
2883 // Check whether the nonconstant input is an AND with a constant mask.
2884 Comparison NewC(C);
2885 uint64_t MaskVal;
2886 ConstantSDNode *Mask = nullptr;
2887 if (C.Op0.getOpcode() == ISD::AND) {
2888 NewC.Op0 = C.Op0.getOperand(0);
2889 NewC.Op1 = C.Op0.getOperand(1);
2890 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2891 if (!Mask)
2892 return;
2893 MaskVal = Mask->getZExtValue();
2894 } else {
2895 // There is no instruction to compare with a 64-bit immediate
2896 // so use TMHH instead if possible. We need an unsigned ordered
2897 // comparison with an i64 immediate.
2898 if (NewC.Op0.getValueType() != MVT::i64 ||
2899 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2900 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2901 NewC.ICmpType == SystemZICMP::SignedOnly)
2902 return;
2903 // Convert LE and GT comparisons into LT and GE.
2904 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2905 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2906 if (CmpVal == uint64_t(-1))
2907 return;
2908 CmpVal += 1;
2909 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2910 }
2911 // If the low N bits of Op1 are zero than the low N bits of Op0 can
2912 // be masked off without changing the result.
2913 MaskVal = -(CmpVal & -CmpVal);
2914 NewC.ICmpType = SystemZICMP::UnsignedOnly;
2915 }
2916 if (!MaskVal)
2917 return;
2918
2919 // Check whether the combination of mask, comparison value and comparison
2920 // type are suitable.
2921 unsigned BitSize = NewC.Op0.getValueSizeInBits();
2922 unsigned NewCCMask, ShiftVal;
2923 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2924 NewC.Op0.getOpcode() == ISD::SHL &&
2925 isSimpleShift(NewC.Op0, ShiftVal) &&
2926 (MaskVal >> ShiftVal != 0) &&
2927 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2928 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2929 MaskVal >> ShiftVal,
2930 CmpVal >> ShiftVal,
2931 SystemZICMP::Any))) {
2932 NewC.Op0 = NewC.Op0.getOperand(0);
2933 MaskVal >>= ShiftVal;
2934 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2935 NewC.Op0.getOpcode() == ISD::SRL &&
2936 isSimpleShift(NewC.Op0, ShiftVal) &&
2937 (MaskVal << ShiftVal != 0) &&
2938 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2939 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2940 MaskVal << ShiftVal,
2941 CmpVal << ShiftVal,
2943 NewC.Op0 = NewC.Op0.getOperand(0);
2944 MaskVal <<= ShiftVal;
2945 } else {
2946 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2947 NewC.ICmpType);
2948 if (!NewCCMask)
2949 return;
2950 }
2951
2952 // Go ahead and make the change.
2953 C.Opcode = SystemZISD::TM;
2954 C.Op0 = NewC.Op0;
2955 if (Mask && Mask->getZExtValue() == MaskVal)
2956 C.Op1 = SDValue(Mask, 0);
2957 else
2958 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2959 C.CCValid = SystemZ::CCMASK_TM;
2960 C.CCMask = NewCCMask;
2961}
2962
2963// Implement i128 comparison in vector registers.
2964static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
2965 Comparison &C) {
2966 if (C.Opcode != SystemZISD::ICMP)
2967 return;
2968 if (C.Op0.getValueType() != MVT::i128)
2969 return;
2970
2971 // (In-)Equality comparisons can be implemented via VCEQGS.
2972 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2973 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2974 C.Opcode = SystemZISD::VICMPES;
2975 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
2976 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
2977 C.CCValid = SystemZ::CCMASK_VCMP;
2978 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
2979 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
2980 else
2981 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
2982 return;
2983 }
2984
2985 // Normalize other comparisons to GT.
2986 bool Swap = false, Invert = false;
2987 switch (C.CCMask) {
2988 case SystemZ::CCMASK_CMP_GT: break;
2989 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
2990 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
2991 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
2992 default: llvm_unreachable("Invalid integer condition!");
2993 }
2994 if (Swap)
2995 std::swap(C.Op0, C.Op1);
2996
2997 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2998 C.Opcode = SystemZISD::UCMP128HI;
2999 else
3000 C.Opcode = SystemZISD::SCMP128HI;
3001 C.CCValid = SystemZ::CCMASK_ANY;
3002 C.CCMask = SystemZ::CCMASK_1;
3003
3004 if (Invert)
3005 C.CCMask ^= C.CCValid;
3006}
3007
3008// See whether the comparison argument contains a redundant AND
3009// and remove it if so. This sometimes happens due to the generic
3010// BRCOND expansion.
3012 Comparison &C) {
3013 if (C.Op0.getOpcode() != ISD::AND)
3014 return;
3015 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3016 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3017 return;
3018 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3019 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3020 return;
3021
3022 C.Op0 = C.Op0.getOperand(0);
3023}
3024
3025// Return a Comparison that tests the condition-code result of intrinsic
3026// node Call against constant integer CC using comparison code Cond.
3027// Opcode is the opcode of the SystemZISD operation for the intrinsic
3028// and CCValid is the set of possible condition-code results.
3029static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3030 SDValue Call, unsigned CCValid, uint64_t CC,
3032 Comparison C(Call, SDValue(), SDValue());
3033 C.Opcode = Opcode;
3034 C.CCValid = CCValid;
3035 if (Cond == ISD::SETEQ)
3036 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3037 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3038 else if (Cond == ISD::SETNE)
3039 // ...and the inverse of that.
3040 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3041 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3042 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3043 // always true for CC>3.
3044 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3045 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3046 // ...and the inverse of that.
3047 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3048 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3049 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3050 // always true for CC>3.
3051 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3052 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3053 // ...and the inverse of that.
3054 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3055 else
3056 llvm_unreachable("Unexpected integer comparison type");
3057 C.CCMask &= CCValid;
3058 return C;
3059}
3060
3061// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3062static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3063 ISD::CondCode Cond, const SDLoc &DL,
3064 SDValue Chain = SDValue(),
3065 bool IsSignaling = false) {
3066 if (CmpOp1.getOpcode() == ISD::Constant) {
3067 assert(!Chain);
3068 unsigned Opcode, CCValid;
3069 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3070 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3071 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3072 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3073 CmpOp1->getAsZExtVal(), Cond);
3074 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3075 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3076 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3077 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3078 CmpOp1->getAsZExtVal(), Cond);
3079 }
3080 Comparison C(CmpOp0, CmpOp1, Chain);
3081 C.CCMask = CCMaskForCondCode(Cond);
3082 if (C.Op0.getValueType().isFloatingPoint()) {
3083 C.CCValid = SystemZ::CCMASK_FCMP;
3084 if (!C.Chain)
3085 C.Opcode = SystemZISD::FCMP;
3086 else if (!IsSignaling)
3087 C.Opcode = SystemZISD::STRICT_FCMP;
3088 else
3089 C.Opcode = SystemZISD::STRICT_FCMPS;
3091 } else {
3092 assert(!C.Chain);
3093 C.CCValid = SystemZ::CCMASK_ICMP;
3094 C.Opcode = SystemZISD::ICMP;
3095 // Choose the type of comparison. Equality and inequality tests can
3096 // use either signed or unsigned comparisons. The choice also doesn't
3097 // matter if both sign bits are known to be clear. In those cases we
3098 // want to give the main isel code the freedom to choose whichever
3099 // form fits best.
3100 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3101 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3102 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3103 C.ICmpType = SystemZICMP::Any;
3104 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3105 C.ICmpType = SystemZICMP::UnsignedOnly;
3106 else
3107 C.ICmpType = SystemZICMP::SignedOnly;
3108 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3109 adjustForRedundantAnd(DAG, DL, C);
3110 adjustZeroCmp(DAG, DL, C);
3111 adjustSubwordCmp(DAG, DL, C);
3112 adjustForSubtraction(DAG, DL, C);
3114 adjustICmpTruncate(DAG, DL, C);
3115 }
3116
3117 if (shouldSwapCmpOperands(C)) {
3118 std::swap(C.Op0, C.Op1);
3119 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3120 }
3121
3123 adjustICmp128(DAG, DL, C);
3124 return C;
3125}
3126
3127// Emit the comparison instruction described by C.
3128static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3129 if (!C.Op1.getNode()) {
3130 SDNode *Node;
3131 switch (C.Op0.getOpcode()) {
3133 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3134 return SDValue(Node, 0);
3136 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3137 return SDValue(Node, Node->getNumValues() - 1);
3138 default:
3139 llvm_unreachable("Invalid comparison operands");
3140 }
3141 }
3142 if (C.Opcode == SystemZISD::ICMP)
3143 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3144 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3145 if (C.Opcode == SystemZISD::TM) {
3146 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3148 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3149 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3150 }
3151 if (C.Opcode == SystemZISD::VICMPES) {
3152 SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32);
3153 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3154 return SDValue(Val.getNode(), 1);
3155 }
3156 if (C.Chain) {
3157 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3158 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3159 }
3160 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3161}
3162
3163// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3164// 64 bits. Extend is the extension type to use. Store the high part
3165// in Hi and the low part in Lo.
3166static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3167 SDValue Op0, SDValue Op1, SDValue &Hi,
3168 SDValue &Lo) {
3169 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3170 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3171 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3172 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3173 DAG.getConstant(32, DL, MVT::i64));
3174 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3175 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3176}
3177
3178// Lower a binary operation that produces two VT results, one in each
3179// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3180// and Opcode performs the GR128 operation. Store the even register result
3181// in Even and the odd register result in Odd.
3182static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3183 unsigned Opcode, SDValue Op0, SDValue Op1,
3184 SDValue &Even, SDValue &Odd) {
3185 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3186 bool Is32Bit = is32Bit(VT);
3187 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3188 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3189}
3190
3191// Return an i32 value that is 1 if the CC value produced by CCReg is
3192// in the mask CCMask and 0 otherwise. CC is known to have a value
3193// in CCValid, so other values can be ignored.
3194static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3195 unsigned CCValid, unsigned CCMask) {
3196 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3197 DAG.getConstant(0, DL, MVT::i32),
3198 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3199 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3200 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3201}
3202
3203// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3204// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3205// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3206// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3207// floating-point comparisons.
3210 switch (CC) {
3211 case ISD::SETOEQ:
3212 case ISD::SETEQ:
3213 switch (Mode) {
3214 case CmpMode::Int: return SystemZISD::VICMPE;
3215 case CmpMode::FP: return SystemZISD::VFCMPE;
3216 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3217 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3218 }
3219 llvm_unreachable("Bad mode");
3220
3221 case ISD::SETOGE:
3222 case ISD::SETGE:
3223 switch (Mode) {
3224 case CmpMode::Int: return 0;
3225 case CmpMode::FP: return SystemZISD::VFCMPHE;
3226 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3227 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3228 }
3229 llvm_unreachable("Bad mode");
3230
3231 case ISD::SETOGT:
3232 case ISD::SETGT:
3233 switch (Mode) {
3234 case CmpMode::Int: return SystemZISD::VICMPH;
3235 case CmpMode::FP: return SystemZISD::VFCMPH;
3236 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3237 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3238 }
3239 llvm_unreachable("Bad mode");
3240
3241 case ISD::SETUGT:
3242 switch (Mode) {
3243 case CmpMode::Int: return SystemZISD::VICMPHL;
3244 case CmpMode::FP: return 0;
3245 case CmpMode::StrictFP: return 0;
3246 case CmpMode::SignalingFP: return 0;
3247 }
3248 llvm_unreachable("Bad mode");
3249
3250 default:
3251 return 0;
3252 }
3253}
3254
3255// Return the SystemZISD vector comparison operation for CC or its inverse,
3256// or 0 if neither can be done directly. Indicate in Invert whether the
3257// result is for the inverse of CC. Mode is as above.
3259 bool &Invert) {
3260 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3261 Invert = false;
3262 return Opcode;
3263 }
3264
3265 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3266 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3267 Invert = true;
3268 return Opcode;
3269 }
3270
3271 return 0;
3272}
3273
3274// Return a v2f64 that contains the extended form of elements Start and Start+1
3275// of v4f32 value Op. If Chain is nonnull, return the strict form.
3276static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3277 SDValue Op, SDValue Chain) {
3278 int Mask[] = { Start, -1, Start + 1, -1 };
3279 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3280 if (Chain) {
3281 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3282 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3283 }
3284 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3285}
3286
3287// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3288// producing a result of type VT. If Chain is nonnull, return the strict form.
3289SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3290 const SDLoc &DL, EVT VT,
3291 SDValue CmpOp0,
3292 SDValue CmpOp1,
3293 SDValue Chain) const {
3294 // There is no hardware support for v4f32 (unless we have the vector
3295 // enhancements facility 1), so extend the vector into two v2f64s
3296 // and compare those.
3297 if (CmpOp0.getValueType() == MVT::v4f32 &&
3298 !Subtarget.hasVectorEnhancements1()) {
3299 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3300 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3301 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3302 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3303 if (Chain) {
3304 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3305 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3306 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3307 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3308 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3309 H1.getValue(1), L1.getValue(1),
3310 HRes.getValue(1), LRes.getValue(1) };
3311 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3312 SDValue Ops[2] = { Res, NewChain };
3313 return DAG.getMergeValues(Ops, DL);
3314 }
3315 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3316 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3317 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3318 }
3319 if (Chain) {
3320 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3321 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3322 }
3323 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3324}
3325
3326// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3327// an integer mask of type VT. If Chain is nonnull, we have a strict
3328// floating-point comparison. If in addition IsSignaling is true, we have
3329// a strict signaling floating-point comparison.
3330SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3331 const SDLoc &DL, EVT VT,
3333 SDValue CmpOp0,
3334 SDValue CmpOp1,
3335 SDValue Chain,
3336 bool IsSignaling) const {
3337 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3338 assert (!Chain || IsFP);
3339 assert (!IsSignaling || Chain);
3340 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3341 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3342 bool Invert = false;
3343 SDValue Cmp;
3344 switch (CC) {
3345 // Handle tests for order using (or (ogt y x) (oge x y)).
3346 case ISD::SETUO:
3347 Invert = true;
3348 [[fallthrough]];
3349 case ISD::SETO: {
3350 assert(IsFP && "Unexpected integer comparison");
3351 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3352 DL, VT, CmpOp1, CmpOp0, Chain);
3353 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3354 DL, VT, CmpOp0, CmpOp1, Chain);
3355 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3356 if (Chain)
3357 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3358 LT.getValue(1), GE.getValue(1));
3359 break;
3360 }
3361
3362 // Handle <> tests using (or (ogt y x) (ogt x y)).
3363 case ISD::SETUEQ:
3364 Invert = true;
3365 [[fallthrough]];
3366 case ISD::SETONE: {
3367 assert(IsFP && "Unexpected integer comparison");
3368 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3369 DL, VT, CmpOp1, CmpOp0, Chain);
3370 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3371 DL, VT, CmpOp0, CmpOp1, Chain);
3372 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3373 if (Chain)
3374 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3375 LT.getValue(1), GT.getValue(1));
3376 break;
3377 }
3378
3379 // Otherwise a single comparison is enough. It doesn't really
3380 // matter whether we try the inversion or the swap first, since
3381 // there are no cases where both work.
3382 default:
3383 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3384 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3385 else {
3387 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3388 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3389 else
3390 llvm_unreachable("Unhandled comparison");
3391 }
3392 if (Chain)
3393 Chain = Cmp.getValue(1);
3394 break;
3395 }
3396 if (Invert) {
3397 SDValue Mask =
3398 DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
3399 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3400 }
3401 if (Chain && Chain.getNode() != Cmp.getNode()) {
3402 SDValue Ops[2] = { Cmp, Chain };
3403 Cmp = DAG.getMergeValues(Ops, DL);
3404 }
3405 return Cmp;
3406}
3407
3408SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3409 SelectionDAG &DAG) const {
3410 SDValue CmpOp0 = Op.getOperand(0);
3411 SDValue CmpOp1 = Op.getOperand(1);
3412 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3413 SDLoc DL(Op);
3414 EVT VT = Op.getValueType();
3415 if (VT.isVector())
3416 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3417
3418 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3419 SDValue CCReg = emitCmp(DAG, DL, C);
3420 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3421}
3422
3423SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3424 SelectionDAG &DAG,
3425 bool IsSignaling) const {
3426 SDValue Chain = Op.getOperand(0);
3427 SDValue CmpOp0 = Op.getOperand(1);
3428 SDValue CmpOp1 = Op.getOperand(2);
3429 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3430 SDLoc DL(Op);
3431 EVT VT = Op.getNode()->getValueType(0);
3432 if (VT.isVector()) {
3433 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3434 Chain, IsSignaling);
3435 return Res.getValue(Op.getResNo());
3436 }
3437
3438 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3439 SDValue CCReg = emitCmp(DAG, DL, C);
3440 CCReg->setFlags(Op->getFlags());
3441 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3442 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3443 return DAG.getMergeValues(Ops, DL);
3444}
3445
3446SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3447 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3448 SDValue CmpOp0 = Op.getOperand(2);
3449 SDValue CmpOp1 = Op.getOperand(3);
3450 SDValue Dest = Op.getOperand(4);
3451 SDLoc DL(Op);
3452
3453 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3454 SDValue CCReg = emitCmp(DAG, DL, C);
3455 return DAG.getNode(
3456 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3457 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3458 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3459}
3460
3461// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3462// allowing Pos and Neg to be wider than CmpOp.
3463static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3464 return (Neg.getOpcode() == ISD::SUB &&
3465 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3466 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
3467 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3468 Pos.getOperand(0) == CmpOp)));
3469}
3470
3471// Return the absolute or negative absolute of Op; IsNegative decides which.
3473 bool IsNegative) {
3474 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3475 if (IsNegative)
3476 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3477 DAG.getConstant(0, DL, Op.getValueType()), Op);
3478 return Op;
3479}
3480
3481SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3482 SelectionDAG &DAG) const {
3483 SDValue CmpOp0 = Op.getOperand(0);
3484 SDValue CmpOp1 = Op.getOperand(1);
3485 SDValue TrueOp = Op.getOperand(2);
3486 SDValue FalseOp = Op.getOperand(3);
3487 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3488 SDLoc DL(Op);
3489
3490 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3491
3492 // Check for absolute and negative-absolute selections, including those
3493 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3494 // This check supplements the one in DAGCombiner.
3495 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3496 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3497 C.Op1.getOpcode() == ISD::Constant &&
3498 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3499 C.Op1->getAsZExtVal() == 0) {
3500 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3501 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3502 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3503 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3504 }
3505
3506 SDValue CCReg = emitCmp(DAG, DL, C);
3507 SDValue Ops[] = {TrueOp, FalseOp,
3508 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3509 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3510
3511 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3512}
3513
3514SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3515 SelectionDAG &DAG) const {
3516 SDLoc DL(Node);
3517 const GlobalValue *GV = Node->getGlobal();
3518 int64_t Offset = Node->getOffset();
3519 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3521
3523 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3524 if (isInt<32>(Offset)) {
3525 // Assign anchors at 1<<12 byte boundaries.
3526 uint64_t Anchor = Offset & ~uint64_t(0xfff);
3527 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3528 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3529
3530 // The offset can be folded into the address if it is aligned to a
3531 // halfword.
3532 Offset -= Anchor;
3533 if (Offset != 0 && (Offset & 1) == 0) {
3534 SDValue Full =
3535 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3536 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3537 Offset = 0;
3538 }
3539 } else {
3540 // Conservatively load a constant offset greater than 32 bits into a
3541 // register below.
3542 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3543 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3544 }
3545 } else if (Subtarget.isTargetELF()) {
3546 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3547 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3548 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3550 } else if (Subtarget.isTargetzOS()) {
3551 Result = getADAEntry(DAG, GV, DL, PtrVT);
3552 } else
3553 llvm_unreachable("Unexpected Subtarget");
3554
3555 // If there was a non-zero offset that we didn't fold, create an explicit
3556 // addition for it.
3557 if (Offset != 0)
3558 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3559 DAG.getConstant(Offset, DL, PtrVT));
3560
3561 return Result;
3562}
3563
3564SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3565 SelectionDAG &DAG,
3566 unsigned Opcode,
3567 SDValue GOTOffset) const {
3568 SDLoc DL(Node);
3569 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3570 SDValue Chain = DAG.getEntryNode();
3571 SDValue Glue;
3572
3575 report_fatal_error("In GHC calling convention TLS is not supported");
3576
3577 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3578 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3579 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3580 Glue = Chain.getValue(1);
3581 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3582 Glue = Chain.getValue(1);
3583
3584 // The first call operand is the chain and the second is the TLS symbol.
3586 Ops.push_back(Chain);
3587 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3588 Node->getValueType(0),
3589 0, 0));
3590
3591 // Add argument registers to the end of the list so that they are
3592 // known live into the call.
3593 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3594 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3595
3596 // Add a register mask operand representing the call-preserved registers.
3597 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3598 const uint32_t *Mask =
3599 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3600 assert(Mask && "Missing call preserved mask for calling convention");
3601 Ops.push_back(DAG.getRegisterMask(Mask));
3602
3603 // Glue the call to the argument copies.
3604 Ops.push_back(Glue);
3605
3606 // Emit the call.
3607 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3608 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3609 Glue = Chain.getValue(1);
3610
3611 // Copy the return value from %r2.
3612 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3613}
3614
3615SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3616 SelectionDAG &DAG) const {
3617 SDValue Chain = DAG.getEntryNode();
3618 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3619
3620 // The high part of the thread pointer is in access register 0.
3621 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3622 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3623
3624 // The low part of the thread pointer is in access register 1.
3625 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3626 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3627
3628 // Merge them into a single 64-bit address.
3629 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3630 DAG.getConstant(32, DL, PtrVT));
3631 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3632}
3633
3634SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3635 SelectionDAG &DAG) const {
3636 if (DAG.getTarget().useEmulatedTLS())
3637 return LowerToTLSEmulatedModel(Node, DAG);
3638 SDLoc DL(Node);
3639 const GlobalValue *GV = Node->getGlobal();
3640 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3641 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
3642
3645 report_fatal_error("In GHC calling convention TLS is not supported");
3646
3647 SDValue TP = lowerThreadPointer(DL, DAG);
3648
3649 // Get the offset of GA from the thread pointer, based on the TLS model.
3651 switch (model) {
3653 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3656
3657 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3658 Offset = DAG.getLoad(
3659 PtrVT, DL, DAG.getEntryNode(), Offset,
3661
3662 // Call __tls_get_offset to retrieve the offset.
3663 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3664 break;
3665 }
3666
3668 // Load the GOT offset of the module ID.
3671
3672 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3673 Offset = DAG.getLoad(
3674 PtrVT, DL, DAG.getEntryNode(), Offset,
3676
3677 // Call __tls_get_offset to retrieve the module base offset.
3678 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3679
3680 // Note: The SystemZLDCleanupPass will remove redundant computations
3681 // of the module base offset. Count total number of local-dynamic
3682 // accesses to trigger execution of that pass.
3686
3687 // Add the per-symbol offset.
3689
3690 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3691 DTPOffset = DAG.getLoad(
3692 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3694
3695 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3696 break;
3697 }
3698
3699 case TLSModel::InitialExec: {
3700 // Load the offset from the GOT.
3701 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3704 Offset =
3705 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3707 break;
3708 }
3709
3710 case TLSModel::LocalExec: {
3711 // Force the offset into the constant pool and load it from there.
3714
3715 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3716 Offset = DAG.getLoad(
3717 PtrVT, DL, DAG.getEntryNode(), Offset,
3719 break;
3720 }
3721 }
3722
3723 // Add the base and offset together.
3724 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3725}
3726
3727SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3728 SelectionDAG &DAG) const {
3729 SDLoc DL(Node);
3730 const BlockAddress *BA = Node->getBlockAddress();
3731 int64_t Offset = Node->getOffset();
3732 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3733
3734 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3735 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3736 return Result;
3737}
3738
3739SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3740 SelectionDAG &DAG) const {
3741 SDLoc DL(JT);
3742 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3743 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3744
3745 // Use LARL to load the address of the table.
3746 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3747}
3748
3749SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3750 SelectionDAG &DAG) const {
3751 SDLoc DL(CP);
3752 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3753
3755 if (CP->isMachineConstantPoolEntry())
3756 Result =
3757 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3758 else
3759 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3760 CP->getOffset());
3761
3762 // Use LARL to load the address of the constant pool entry.
3763 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3764}
3765
3766SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3767 SelectionDAG &DAG) const {
3768 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
3770 MachineFrameInfo &MFI = MF.getFrameInfo();
3771 MFI.setFrameAddressIsTaken(true);
3772
3773 SDLoc DL(Op);
3774 unsigned Depth = Op.getConstantOperandVal(0);
3775 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3776
3777 // By definition, the frame address is the address of the back chain. (In
3778 // the case of packed stack without backchain, return the address where the
3779 // backchain would have been stored. This will either be an unused space or
3780 // contain a saved register).
3781 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3782 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3783
3784 if (Depth > 0) {
3785 // FIXME The frontend should detect this case.
3786 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
3787 report_fatal_error("Unsupported stack frame traversal count");
3788
3789 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
3790 while (Depth--) {
3791 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
3793 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
3794 }
3795 }
3796
3797 return BackChain;
3798}
3799
3800SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3801 SelectionDAG &DAG) const {
3803 MachineFrameInfo &MFI = MF.getFrameInfo();
3804 MFI.setReturnAddressIsTaken(true);
3805
3807 return SDValue();
3808
3809 SDLoc DL(Op);
3810 unsigned Depth = Op.getConstantOperandVal(0);
3811 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3812
3813 if (Depth > 0) {
3814 // FIXME The frontend should detect this case.
3815 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
3816 report_fatal_error("Unsupported stack frame traversal count");
3817
3818 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
3819 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
3820 int Offset = TFL->getReturnAddressOffset(MF);
3821 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
3822 DAG.getConstant(Offset, DL, PtrVT));
3823 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
3825 }
3826
3827 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an
3828 // implicit live-in.
3831 &SystemZ::GR64BitRegClass);
3832 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3833}
3834
3835SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3836 SelectionDAG &DAG) const {
3837 SDLoc DL(Op);
3838 SDValue In = Op.getOperand(0);
3839 EVT InVT = In.getValueType();
3840 EVT ResVT = Op.getValueType();
3841
3842 // Convert loads directly. This is normally done by DAGCombiner,
3843 // but we need this case for bitcasts that are created during lowering
3844 // and which are then lowered themselves.
3845 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3846 if (ISD::isNormalLoad(LoadN)) {
3847 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3848 LoadN->getBasePtr(), LoadN->getMemOperand());
3849 // Update the chain uses.
3850 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3851 return NewLoad;
3852 }
3853
3854 if (InVT == MVT::i32 && ResVT == MVT::f32) {
3855 SDValue In64;
3856 if (Subtarget.hasHighWord()) {
3857 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3858 MVT::i64);
3859 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3860 MVT::i64, SDValue(U64, 0), In);
3861 } else {
3862 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3863 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3864 DAG.getConstant(32, DL, MVT::i64));
3865 }
3866 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3867 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3868 DL, MVT::f32, Out64);
3869 }
3870 if (InVT == MVT::f32 && ResVT == MVT::i32) {
3871 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3872 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3873 MVT::f64, SDValue(U64, 0), In);
3874 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3875 if (Subtarget.hasHighWord())
3876 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3877 MVT::i32, Out64);
3878 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3879 DAG.getConstant(32, DL, MVT::i64));
3880 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3881 }
3882 llvm_unreachable("Unexpected bitcast combination");
3883}
3884
3885SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3886 SelectionDAG &DAG) const {
3887
3888 if (Subtarget.isTargetXPLINK64())
3889 return lowerVASTART_XPLINK(Op, DAG);
3890 else
3891 return lowerVASTART_ELF(Op, DAG);
3892}
3893
3894SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
3895 SelectionDAG &DAG) const {
3897 SystemZMachineFunctionInfo *FuncInfo =
3899
3900 SDLoc DL(Op);
3901
3902 // vastart just stores the address of the VarArgsFrameIndex slot into the
3903 // memory location argument.
3904 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3905 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3906 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3907 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
3908 MachinePointerInfo(SV));
3909}
3910
3911SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
3912 SelectionDAG &DAG) const {
3914 SystemZMachineFunctionInfo *FuncInfo =
3916 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3917
3918 SDValue Chain = Op.getOperand(0);
3919 SDValue Addr = Op.getOperand(1);
3920 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3921 SDLoc DL(Op);
3922
3923 // The initial values of each field.
3924 const unsigned NumFields = 4;
3925 SDValue Fields[NumFields] = {
3926 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3927 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3928 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3929 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3930 };
3931
3932 // Store each field into its respective slot.
3933 SDValue MemOps[NumFields];
3934 unsigned Offset = 0;
3935 for (unsigned I = 0; I < NumFields; ++I) {
3936 SDValue FieldAddr = Addr;
3937 if (Offset != 0)
3938 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3940 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3942 Offset += 8;
3943 }
3944 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3945}
3946
3947SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3948 SelectionDAG &DAG) const {
3949 SDValue Chain = Op.getOperand(0);
3950 SDValue DstPtr = Op.getOperand(1);
3951 SDValue SrcPtr = Op.getOperand(2);
3952 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3953 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3954 SDLoc DL(Op);
3955
3956 uint32_t Sz =
3957 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
3958 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
3959 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3960 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV),
3961 MachinePointerInfo(SrcSV));
3962}
3963
3964SDValue
3965SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
3966 SelectionDAG &DAG) const {
3967 if (Subtarget.isTargetXPLINK64())
3968 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
3969 else
3970 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
3971}
3972
3973SDValue
3974SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
3975 SelectionDAG &DAG) const {
3976 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3978 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3979 SDValue Chain = Op.getOperand(0);
3980 SDValue Size = Op.getOperand(1);
3981 SDValue Align = Op.getOperand(2);
3982 SDLoc DL(Op);
3983
3984 // If user has set the no alignment function attribute, ignore
3985 // alloca alignments.
3986 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
3987
3988 uint64_t StackAlign = TFI->getStackAlignment();
3989 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3990 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3991
3992 SDValue NeededSpace = Size;
3993
3994 // Add extra space for alignment if needed.
3995 EVT PtrVT = getPointerTy(MF.getDataLayout());
3996 if (ExtraAlignSpace)
3997 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
3998 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3999
4000 bool IsSigned = false;
4001 bool DoesNotReturn = false;
4002 bool IsReturnValueUsed = false;
4003 EVT VT = Op.getValueType();
4004 SDValue AllocaCall =
4005 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
4006 CallingConv::C, IsSigned, DL, DoesNotReturn,
4007 IsReturnValueUsed)
4008 .first;
4009
4010 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4011 // to end of call in order to ensure it isn't broken up from the call
4012 // sequence.
4013 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4014 Register SPReg = Regs.getStackPointerRegister();
4015 Chain = AllocaCall.getValue(1);
4016 SDValue Glue = AllocaCall.getValue(2);
4017 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4018 Chain = NewSPRegNode.getValue(1);
4019
4020 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4021 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4022 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4023
4024 // Dynamically realign if needed.
4025 if (ExtraAlignSpace) {
4026 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4027 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4028 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4029 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4030 }
4031
4032 SDValue Ops[2] = {Result, Chain};
4033 return DAG.getMergeValues(Ops, DL);
4034}
4035
4036SDValue
4037SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4038 SelectionDAG &DAG) const {
4039 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4041 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4042 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4043
4044 SDValue Chain = Op.getOperand(0);
4045 SDValue Size = Op.getOperand(1);
4046 SDValue Align = Op.getOperand(2);
4047 SDLoc DL(Op);
4048
4049 // If user has set the no alignment function attribute, ignore
4050 // alloca alignments.
4051 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4052
4053 uint64_t StackAlign = TFI->getStackAlignment();
4054 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4055 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4056
4058 SDValue NeededSpace = Size;
4059
4060 // Get a reference to the stack pointer.
4061 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4062
4063 // If we need a backchain, save it now.
4064 SDValue Backchain;
4065 if (StoreBackchain)
4066 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4068
4069 // Add extra space for alignment if needed.
4070 if (ExtraAlignSpace)
4071 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4072 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4073
4074 // Get the new stack pointer value.
4075 SDValue NewSP;
4076 if (hasInlineStackProbe(MF)) {
4078 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4079 Chain = NewSP.getValue(1);
4080 }
4081 else {
4082 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4083 // Copy the new stack pointer back.
4084 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4085 }
4086
4087 // The allocated data lives above the 160 bytes allocated for the standard
4088 // frame, plus any outgoing stack arguments. We don't know how much that
4089 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4090 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4091 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4092
4093 // Dynamically realign if needed.
4094 if (RequiredAlign > StackAlign) {
4095 Result =
4096 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4097 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4098 Result =
4099 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4100 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4101 }
4102
4103 if (StoreBackchain)
4104 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4106
4107 SDValue Ops[2] = { Result, Chain };
4108 return DAG.getMergeValues(Ops, DL);
4109}
4110
4111SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4112 SDValue Op, SelectionDAG &DAG) const {
4113 SDLoc DL(Op);
4114
4115 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4116}
4117
4118SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4119 SelectionDAG &DAG) const {
4120 EVT VT = Op.getValueType();
4121 SDLoc DL(Op);
4122 SDValue Ops[2];
4123 if (is32Bit(VT))
4124 // Just do a normal 64-bit multiplication and extract the results.
4125 // We define this so that it can be used for constant division.
4126 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4127 Op.getOperand(1), Ops[1], Ops[0]);
4128 else if (Subtarget.hasMiscellaneousExtensions2())
4129 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4130 // the high result in the even register. ISD::SMUL_LOHI is defined to
4131 // return the low half first, so the results are in reverse order.
4133 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4134 else {
4135 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4136 //
4137 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4138 //
4139 // but using the fact that the upper halves are either all zeros
4140 // or all ones:
4141 //
4142 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4143 //
4144 // and grouping the right terms together since they are quicker than the
4145 // multiplication:
4146 //
4147 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4148 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4149 SDValue LL = Op.getOperand(0);
4150 SDValue RL = Op.getOperand(1);
4151 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4152 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4153 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4154 // the high result in the even register. ISD::SMUL_LOHI is defined to
4155 // return the low half first, so the results are in reverse order.
4157 LL, RL, Ops[1], Ops[0]);
4158 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4159 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4160 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4161 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4162 }
4163 return DAG.getMergeValues(Ops, DL);
4164}
4165
4166SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4167 SelectionDAG &DAG) const {
4168 EVT VT = Op.getValueType();
4169 SDLoc DL(Op);
4170 SDValue Ops[2];
4171 if (is32Bit(VT))
4172 // Just do a normal 64-bit multiplication and extract the results.
4173 // We define this so that it can be used for constant division.
4174 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4175 Op.getOperand(1), Ops[1], Ops[0]);
4176 else
4177 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4178 // the high result in the even register. ISD::UMUL_LOHI is defined to
4179 // return the low half first, so the results are in reverse order.
4181 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4182 return DAG.getMergeValues(Ops, DL);
4183}
4184
4185SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4186 SelectionDAG &DAG) const {
4187 SDValue Op0 = Op.getOperand(0);
4188 SDValue Op1 = Op.getOperand(1);
4189 EVT VT = Op.getValueType();
4190 SDLoc DL(Op);
4191
4192 // We use DSGF for 32-bit division. This means the first operand must
4193 // always be 64-bit, and the second operand should be 32-bit whenever
4194 // that is possible, to improve performance.
4195 if (is32Bit(VT))
4196 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4197 else if (DAG.ComputeNumSignBits(Op1) > 32)
4198 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4199
4200 // DSG(F) returns the remainder in the even register and the
4201 // quotient in the odd register.
4202 SDValue Ops[2];
4203 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4204 return DAG.getMergeValues(Ops, DL);
4205}
4206
4207SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4208 SelectionDAG &DAG) const {
4209 EVT VT = Op.getValueType();
4210 SDLoc DL(Op);
4211
4212 // DL(G) returns the remainder in the even register and the
4213 // quotient in the odd register.
4214 SDValue Ops[2];
4216 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4217 return DAG.getMergeValues(Ops, DL);
4218}
4219
4220SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4221 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4222
4223 // Get the known-zero masks for each operand.
4224 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4225 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4226 DAG.computeKnownBits(Ops[1])};
4227
4228 // See if the upper 32 bits of one operand and the lower 32 bits of the
4229 // other are known zero. They are the low and high operands respectively.
4230 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4231 Known[1].Zero.getZExtValue() };
4232 unsigned High, Low;
4233 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4234 High = 1, Low = 0;
4235 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4236 High = 0, Low = 1;
4237 else
4238 return Op;
4239
4240 SDValue LowOp = Ops[Low];
4241 SDValue HighOp = Ops[High];
4242
4243 // If the high part is a constant, we're better off using IILH.
4244 if (HighOp.getOpcode() == ISD::Constant)
4245 return Op;
4246
4247 // If the low part is a constant that is outside the range of LHI,
4248 // then we're better off using IILF.
4249 if (LowOp.getOpcode() == ISD::Constant) {
4250 int64_t Value = int32_t(LowOp->getAsZExtVal());
4251 if (!isInt<16>(Value))
4252 return Op;
4253 }
4254
4255 // Check whether the high part is an AND that doesn't change the
4256 // high 32 bits and just masks out low bits. We can skip it if so.
4257 if (HighOp.getOpcode() == ISD::AND &&
4258 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4259 SDValue HighOp0 = HighOp.getOperand(0);
4261 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4262 HighOp = HighOp0;
4263 }
4264
4265 // Take advantage of the fact that all GR32 operations only change the
4266 // low 32 bits by truncating Low to an i32 and inserting it directly
4267 // using a subreg. The interesting cases are those where the truncation
4268 // can be folded.
4269 SDLoc DL(Op);
4270 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4271 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4272 MVT::i64, HighOp, Low32);
4273}
4274
4275// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4276SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4277 SelectionDAG &DAG) const {
4278 SDNode *N = Op.getNode();
4279 SDValue LHS = N->getOperand(0);
4280 SDValue RHS = N->getOperand(1);
4281 SDLoc DL(N);
4282
4283 if (N->getValueType(0) == MVT::i128) {
4284 unsigned BaseOp = 0;
4285 unsigned FlagOp = 0;
4286 bool IsBorrow = false;
4287 switch (Op.getOpcode()) {
4288 default: llvm_unreachable("Unknown instruction!");
4289 case ISD::UADDO:
4290 BaseOp = ISD::ADD;
4291 FlagOp = SystemZISD::VACC;
4292 break;
4293 case ISD::USUBO:
4294 BaseOp = ISD::SUB;
4295 FlagOp = SystemZISD::VSCBI;
4296 IsBorrow = true;
4297 break;
4298 }
4299 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4300 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4301 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4302 DAG.getValueType(MVT::i1));
4303 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4304 if (IsBorrow)
4305 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4306 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4307 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4308 }
4309
4310 unsigned BaseOp = 0;
4311 unsigned CCValid = 0;
4312 unsigned CCMask = 0;
4313
4314 switch (Op.getOpcode()) {
4315 default: llvm_unreachable("Unknown instruction!");
4316 case ISD::SADDO:
4317 BaseOp = SystemZISD::SADDO;
4318 CCValid = SystemZ::CCMASK_ARITH;
4320 break;
4321 case ISD::SSUBO:
4322 BaseOp = SystemZISD::SSUBO;
4323 CCValid = SystemZ::CCMASK_ARITH;
4325 break;
4326 case ISD::UADDO:
4327 BaseOp = SystemZISD::UADDO;
4328 CCValid = SystemZ::CCMASK_LOGICAL;
4330 break;
4331 case ISD::USUBO:
4332 BaseOp = SystemZISD::USUBO;
4333 CCValid = SystemZ::CCMASK_LOGICAL;
4335 break;
4336 }
4337
4338 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4339 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4340
4341 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4342 if (N->getValueType(1) == MVT::i1)
4343 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4344
4345 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4346}
4347
4348static bool isAddCarryChain(SDValue Carry) {
4349 while (Carry.getOpcode() == ISD::UADDO_CARRY)
4350 Carry = Carry.getOperand(2);
4351 return Carry.getOpcode() == ISD::UADDO;
4352}
4353
4354static bool isSubBorrowChain(SDValue Carry) {
4355 while (Carry.getOpcode() == ISD::USUBO_CARRY)
4356 Carry = Carry.getOperand(2);
4357 return Carry.getOpcode() == ISD::USUBO;
4358}
4359
4360// Lower UADDO_CARRY/USUBO_CARRY nodes.
4361SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4362 SelectionDAG &DAG) const {
4363
4364 SDNode *N = Op.getNode();
4365 MVT VT = N->getSimpleValueType(0);
4366
4367 // Let legalize expand this if it isn't a legal type yet.
4368 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4369 return SDValue();
4370
4371 SDValue LHS = N->getOperand(0);
4372 SDValue RHS = N->getOperand(1);
4373 SDValue Carry = Op.getOperand(2);
4374 SDLoc DL(N);
4375
4376 if (VT == MVT::i128) {
4377 unsigned BaseOp = 0;
4378 unsigned FlagOp = 0;
4379 bool IsBorrow = false;
4380 switch (Op.getOpcode()) {
4381 default: llvm_unreachable("Unknown instruction!");
4382 case ISD::UADDO_CARRY:
4383 BaseOp = SystemZISD::VAC;
4384 FlagOp = SystemZISD::VACCC;
4385 break;
4386 case ISD::USUBO_CARRY:
4387 BaseOp = SystemZISD::VSBI;
4388 FlagOp = SystemZISD::VSBCBI;
4389 IsBorrow = true;
4390 break;
4391 }
4392 if (IsBorrow)
4393 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4394 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
4395 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
4396 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
4397 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
4398 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4399 DAG.getValueType(MVT::i1));
4400 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4401 if (IsBorrow)
4402 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4403 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4404 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4405 }
4406
4407 unsigned BaseOp = 0;
4408 unsigned CCValid = 0;
4409 unsigned CCMask = 0;
4410
4411 switch (Op.getOpcode()) {
4412 default: llvm_unreachable("Unknown instruction!");
4413 case ISD::UADDO_CARRY:
4414 if (!isAddCarryChain(Carry))
4415 return SDValue();
4416
4417 BaseOp = SystemZISD::ADDCARRY;
4418 CCValid = SystemZ::CCMASK_LOGICAL;
4420 break;
4421 case ISD::USUBO_CARRY:
4422 if (!isSubBorrowChain(Carry))
4423 return SDValue();
4424
4425 BaseOp = SystemZISD::SUBCARRY;
4426 CCValid = SystemZ::CCMASK_LOGICAL;
4428 break;
4429 }
4430
4431 // Set the condition code from the carry flag.
4432 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4433 DAG.getConstant(CCValid, DL, MVT::i32),
4434 DAG.getConstant(CCMask, DL, MVT::i32));
4435
4436 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4437 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4438
4439 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4440 if (N->getValueType(1) == MVT::i1)
4441 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4442
4443 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4444}
4445
4446SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4447 SelectionDAG &DAG) const {
4448 EVT VT = Op.getValueType();
4449 SDLoc DL(Op);
4450 Op = Op.getOperand(0);
4451
4452 if (VT.getScalarSizeInBits() == 128) {
4453 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
4454 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
4455 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
4456 DAG.getConstant(0, DL, MVT::i64));
4457 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4458 return Op;
4459 }
4460
4461 // Handle vector types via VPOPCT.
4462 if (VT.isVector()) {
4463 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4464 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4465 switch (VT.getScalarSizeInBits()) {
4466 case 8:
4467 break;
4468 case 16: {
4469 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4470 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
4471 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
4472 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4473 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
4474 break;
4475 }
4476 case 32: {
4477 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4478 DAG.getConstant(0, DL, MVT::i32));
4479 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4480 break;
4481 }
4482 case 64: {
4483 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4484 DAG.getConstant(0, DL, MVT::i32));
4485 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
4486 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4487 break;
4488 }
4489 default:
4490 llvm_unreachable("Unexpected type");
4491 }
4492 return Op;
4493 }
4494
4495 // Get the known-zero mask for the operand.
4496 KnownBits Known = DAG.computeKnownBits(Op);
4497 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
4498 if (NumSignificantBits == 0)
4499 return DAG.getConstant(0, DL, VT);
4500
4501 // Skip known-zero high parts of the operand.
4502 int64_t OrigBitSize = VT.getSizeInBits();
4503 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
4504 BitSize = std::min(BitSize, OrigBitSize);
4505
4506 // The POPCNT instruction counts the number of bits in each byte.
4507 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
4508 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
4509 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
4510
4511 // Add up per-byte counts in a binary tree. All bits of Op at
4512 // position larger than BitSize remain zero throughout.
4513 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
4514 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
4515 if (BitSize != OrigBitSize)
4516 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
4517 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
4518 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4519 }
4520
4521 // Extract overall result from high byte.
4522 if (BitSize > 8)
4523 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
4524 DAG.getConstant(BitSize - 8, DL, VT));
4525
4526 return Op;
4527}
4528
4529SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
4530 SelectionDAG &DAG) const {
4531 SDLoc DL(Op);
4532 AtomicOrdering FenceOrdering =
4533 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4534 SyncScope::ID FenceSSID =
4535 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4536
4537 // The only fence that needs an instruction is a sequentially-consistent
4538 // cross-thread fence.
4539 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
4540 FenceSSID == SyncScope::System) {
4541 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
4542 Op.getOperand(0)),
4543 0);
4544 }
4545
4546 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4547 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
4548}
4549
4550SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
4551 SelectionDAG &DAG) const {
4552 auto *Node = cast<AtomicSDNode>(Op.getNode());
4553 assert(
4554 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
4555 "Only custom lowering i128 or f128.");
4556 // Use same code to handle both legal and non-legal i128 types.
4559 return DAG.getMergeValues(Results, SDLoc(Op));
4560}
4561
4562// Prepare for a Compare And Swap for a subword operation. This needs to be
4563// done in memory with 4 bytes at natural alignment.
4565 SDValue &AlignedAddr, SDValue &BitShift,
4566 SDValue &NegBitShift) {
4567 EVT PtrVT = Addr.getValueType();
4568 EVT WideVT = MVT::i32;
4569
4570 // Get the address of the containing word.
4571 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4572 DAG.getConstant(-4, DL, PtrVT));
4573
4574 // Get the number of bits that the word must be rotated left in order
4575 // to bring the field to the top bits of a GR32.
4576 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4577 DAG.getConstant(3, DL, PtrVT));
4578 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4579
4580 // Get the complementing shift amount, for rotating a field in the top
4581 // bits back to its proper position.
4582 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4583 DAG.getConstant(0, DL, WideVT), BitShift);
4584
4585}
4586
4587// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
4588// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
4589SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
4590 SelectionDAG &DAG,
4591 unsigned Opcode) const {
4592 auto *Node = cast<AtomicSDNode>(Op.getNode());
4593
4594 // 32-bit operations need no special handling.
4595 EVT NarrowVT = Node->getMemoryVT();
4596 EVT WideVT = MVT::i32;
4597 if (NarrowVT == WideVT)
4598 return Op;
4599
4600 int64_t BitSize = NarrowVT.getSizeInBits();
4601 SDValue ChainIn = Node->getChain();
4602 SDValue Addr = Node->getBasePtr();
4603 SDValue Src2 = Node->getVal();
4604 MachineMemOperand *MMO = Node->getMemOperand();
4605 SDLoc DL(Node);
4606
4607 // Convert atomic subtracts of constants into additions.
4608 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
4609 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
4611 Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
4612 }
4613
4614 SDValue AlignedAddr, BitShift, NegBitShift;
4615 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4616
4617 // Extend the source operand to 32 bits and prepare it for the inner loop.
4618 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
4619 // operations require the source to be shifted in advance. (This shift
4620 // can be folded if the source is constant.) For AND and NAND, the lower
4621 // bits must be set, while for other opcodes they should be left clear.
4622 if (Opcode != SystemZISD::ATOMIC_SWAPW)
4623 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
4624 DAG.getConstant(32 - BitSize, DL, WideVT));
4625 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
4627 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
4628 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
4629
4630 // Construct the ATOMIC_LOADW_* node.
4631 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
4632 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
4633 DAG.getConstant(BitSize, DL, WideVT) };
4634 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
4635 NarrowVT, MMO);
4636
4637 // Rotate the result of the final CS so that the field is in the lower
4638 // bits of a GR32, then truncate it.
4639 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
4640 DAG.getConstant(BitSize, DL, WideVT));
4641 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
4642
4643 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
4644 return DAG.getMergeValues(RetOps, DL);
4645}
4646
4647// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
4648// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
4649SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
4650 SelectionDAG &DAG) const {
4651 auto *Node = cast<AtomicSDNode>(Op.getNode());
4652 EVT MemVT = Node->getMemoryVT();
4653 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
4654 // A full-width operation: negate and use LAA(G).
4655 assert(Op.getValueType() == MemVT && "Mismatched VTs");
4656 assert(Subtarget.hasInterlockedAccess1() &&
4657 "Should have been expanded by AtomicExpand pass.");
4658 SDValue Src2 = Node->getVal();
4659 SDLoc DL(Src2);
4660 SDValue NegSrc2 =
4661 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
4662 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
4663 Node->getChain(), Node->getBasePtr(), NegSrc2,
4664 Node->getMemOperand());
4665 }
4666
4667 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
4668}
4669
4670// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
4671SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
4672 SelectionDAG &DAG) const {
4673 auto *Node = cast<AtomicSDNode>(Op.getNode());
4674 SDValue ChainIn = Node->getOperand(0);
4675 SDValue Addr = Node->getOperand(1);
4676 SDValue CmpVal = Node->getOperand(2);
4677 SDValue SwapVal = Node->getOperand(3);
4678 MachineMemOperand *MMO = Node->getMemOperand();
4679 SDLoc DL(Node);
4680
4681 if (Node->getMemoryVT() == MVT::i128) {
4682 // Use same code to handle both legal and non-legal i128 types.
4685 return DAG.getMergeValues(Results, DL);
4686 }
4687
4688 // We have native support for 32-bit and 64-bit compare and swap, but we
4689 // still need to expand extracting the "success" result from the CC.
4690 EVT NarrowVT = Node->getMemoryVT();
4691 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
4692 if (NarrowVT == WideVT) {
4693 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4694 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
4696 DL, Tys, Ops, NarrowVT, MMO);
4697 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4699
4700 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
4701 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4702 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4703 return SDValue();
4704 }
4705
4706 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
4707 // via a fullword ATOMIC_CMP_SWAPW operation.
4708 int64_t BitSize = NarrowVT.getSizeInBits();
4709
4710 SDValue AlignedAddr, BitShift, NegBitShift;
4711 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4712
4713 // Construct the ATOMIC_CMP_SWAPW node.
4714 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4715 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
4716 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
4718 VTList, Ops, NarrowVT, MMO);
4719 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4721
4722 // emitAtomicCmpSwapW() will zero extend the result (original value).
4723 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
4724 DAG.getValueType(NarrowVT));
4725 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
4726 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4727 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4728 return SDValue();
4729}
4730
4732SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
4733 // Because of how we convert atomic_load and atomic_store to normal loads and
4734 // stores in the DAG, we need to ensure that the MMOs are marked volatile
4735 // since DAGCombine hasn't been updated to account for atomic, but non
4736 // volatile loads. (See D57601)
4737 if (auto *SI = dyn_cast<StoreInst>(&I))
4738 if (SI->isAtomic())
4740 if (auto *LI = dyn_cast<LoadInst>(&I))
4741 if (LI->isAtomic())
4743 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
4744 if (AI->isAtomic())
4746 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
4747 if (AI->isAtomic())
4750}
4751
4752SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
4753 SelectionDAG &DAG) const {
4755 auto *Regs = Subtarget.getSpecialRegisters();
4757 report_fatal_error("Variable-sized stack allocations are not supported "
4758 "in GHC calling convention");
4759 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
4760 Regs->getStackPointerRegister(), Op.getValueType());
4761}
4762
4763SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
4764 SelectionDAG &DAG) const {
4766 auto *Regs = Subtarget.getSpecialRegisters();
4767 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4768
4770 report_fatal_error("Variable-sized stack allocations are not supported "
4771 "in GHC calling convention");
4772
4773 SDValue Chain = Op.getOperand(0);
4774 SDValue NewSP = Op.getOperand(1);
4775 SDValue Backchain;
4776 SDLoc DL(Op);
4777
4778 if (StoreBackchain) {
4779 SDValue OldSP = DAG.getCopyFromReg(
4780 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
4781 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4783 }
4784
4785 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
4786
4787 if (StoreBackchain)
4788 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4790
4791 return Chain;
4792}
4793
4794SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
4795 SelectionDAG &DAG) const {
4796 bool IsData = Op.getConstantOperandVal(4);
4797 if (!IsData)
4798 // Just preserve the chain.
4799 return Op.getOperand(0);
4800
4801 SDLoc DL(Op);
4802 bool IsWrite = Op.getConstantOperandVal(2);
4803 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
4804 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
4805 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
4806 Op.getOperand(1)};
4808 Node->getVTList(), Ops,
4809 Node->getMemoryVT(), Node->getMemOperand());
4810}
4811
4812// Convert condition code in CCReg to an i32 value.
4814 SDLoc DL(CCReg);
4815 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
4816 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
4817 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
4818}
4819
4820SDValue
4821SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4822 SelectionDAG &DAG) const {
4823 unsigned Opcode, CCValid;
4824 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
4825 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
4826 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
4827 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
4828 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
4829 return SDValue();
4830 }
4831
4832 return SDValue();
4833}
4834
4835SDValue
4836SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4837 SelectionDAG &DAG) const {
4838 unsigned Opcode, CCValid;
4839 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
4840 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
4841 if (Op->getNumValues() == 1)
4842 return getCCResult(DAG, SDValue(Node, 0));
4843 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
4844 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
4845 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
4846 }
4847
4848 unsigned Id = Op.getConstantOperandVal(0);
4849 switch (Id) {
4850 case Intrinsic::thread_pointer:
4851 return lowerThreadPointer(SDLoc(Op), DAG);
4852
4853 case Intrinsic::s390_vpdi:
4854 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
4855 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4856
4857 case Intrinsic::s390_vperm:
4858 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
4859 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4860
4861 case Intrinsic::s390_vuphb:
4862 case Intrinsic::s390_vuphh:
4863 case Intrinsic::s390_vuphf:
4864 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
4865 Op.getOperand(1));
4866
4867 case Intrinsic::s390_vuplhb:
4868 case Intrinsic::s390_vuplhh:
4869 case Intrinsic::s390_vuplhf:
4870 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
4871 Op.getOperand(1));
4872
4873 case Intrinsic::s390_vuplb:
4874 case Intrinsic::s390_vuplhw:
4875 case Intrinsic::s390_vuplf:
4876 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
4877 Op.getOperand(1));
4878
4879 case Intrinsic::s390_vupllb:
4880 case Intrinsic::s390_vupllh:
4881 case Intrinsic::s390_vupllf:
4882 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
4883 Op.getOperand(1));
4884
4885 case Intrinsic::s390_vsumb:
4886 case Intrinsic::s390_vsumh:
4887 case Intrinsic::s390_vsumgh:
4888 case Intrinsic::s390_vsumgf:
4889 case Intrinsic::s390_vsumqf:
4890 case Intrinsic::s390_vsumqg:
4891 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
4892 Op.getOperand(1), Op.getOperand(2));
4893
4894 case Intrinsic::s390_vaq:
4895 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
4896 Op.getOperand(1), Op.getOperand(2));
4897 case Intrinsic::s390_vaccb:
4898 case Intrinsic::s390_vacch:
4899 case Intrinsic::s390_vaccf:
4900 case Intrinsic::s390_vaccg:
4901 case Intrinsic::s390_vaccq:
4902 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
4903 Op.getOperand(1), Op.getOperand(2));
4904 case Intrinsic::s390_vacq:
4905 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
4906 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4907 case Intrinsic::s390_vacccq:
4908 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
4909 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4910
4911 case Intrinsic::s390_vsq:
4912 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
4913 Op.getOperand(1), Op.getOperand(2));
4914 case Intrinsic::s390_vscbib:
4915 case Intrinsic::s390_vscbih:
4916 case Intrinsic::s390_vscbif:
4917 case Intrinsic::s390_vscbig:
4918 case Intrinsic::s390_vscbiq:
4919 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
4920 Op.getOperand(1), Op.getOperand(2));
4921 case Intrinsic::s390_vsbiq:
4922 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
4923 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4924 case Intrinsic::s390_vsbcbiq:
4925 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
4926 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4927 }
4928
4929 return SDValue();
4930}
4931
4932namespace {
4933// Says that SystemZISD operation Opcode can be used to perform the equivalent
4934// of a VPERM with permute vector Bytes. If Opcode takes three operands,
4935// Operand is the constant third operand, otherwise it is the number of
4936// bytes in each element of the result.
4937struct Permute {
4938 unsigned Opcode;
4939 unsigned Operand;
4940 unsigned char Bytes[SystemZ::VectorBytes];
4941};
4942}
4943
4944static const Permute PermuteForms[] = {
4945 // VMRHG
4947 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
4948 // VMRHF
4950 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
4951 // VMRHH
4953 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
4954 // VMRHB
4956 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
4957 // VMRLG
4959 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
4960 // VMRLF
4962 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
4963 // VMRLH
4965 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
4966 // VMRLB
4968 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
4969 // VPKG
4970 { SystemZISD::PACK, 4,
4971 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
4972 // VPKF
4973 { SystemZISD::PACK, 2,
4974 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
4975 // VPKH
4976 { SystemZISD::PACK, 1,
4977 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
4978 // VPDI V1, V2, 4 (low half of V1, high half of V2)
4980 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
4981 // VPDI V1, V2, 1 (high half of V1, low half of V2)
4983 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
4984};
4985
4986// Called after matching a vector shuffle against a particular pattern.
4987// Both the original shuffle and the pattern have two vector operands.
4988// OpNos[0] is the operand of the original shuffle that should be used for
4989// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
4990// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
4991// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
4992// for operands 0 and 1 of the pattern.
4993static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
4994 if (OpNos[0] < 0) {
4995 if (OpNos[1] < 0)
4996 return false;
4997 OpNo0 = OpNo1 = OpNos[1];
4998 } else if (OpNos[1] < 0) {
4999 OpNo0 = OpNo1 = OpNos[0];
5000 } else {
5001 OpNo0 = OpNos[0];
5002 OpNo1 = OpNos[1];
5003 }
5004 return true;
5005}
5006
5007// Bytes is a VPERM-like permute vector, except that -1 is used for
5008// undefined bytes. Return true if the VPERM can be implemented using P.
5009// When returning true set OpNo0 to the VPERM operand that should be
5010// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5011//
5012// For example, if swapping the VPERM operands allows P to match, OpNo0
5013// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5014// operand, but rewriting it to use two duplicated operands allows it to
5015// match P, then OpNo0 and OpNo1 will be the same.
5016static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5017 unsigned &OpNo0, unsigned &OpNo1) {
5018 int OpNos[] = { -1, -1 };
5019 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5020 int Elt = Bytes[I];
5021 if (Elt >= 0) {
5022 // Make sure that the two permute vectors use the same suboperand
5023 // byte number. Only the operand numbers (the high bits) are
5024 // allowed to differ.
5025 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5026 return false;
5027 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5028 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5029 // Make sure that the operand mappings are consistent with previous
5030 // elements.
5031 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5032 return false;
5033 OpNos[ModelOpNo] = RealOpNo;
5034 }
5035 }
5036 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5037}
5038
5039// As above, but search for a matching permute.
5040static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5041 unsigned &OpNo0, unsigned &OpNo1) {
5042 for (auto &P : PermuteForms)
5043 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5044 return &P;
5045 return nullptr;
5046}
5047
5048// Bytes is a VPERM-like permute vector, except that -1 is used for
5049// undefined bytes. This permute is an operand of an outer permute.
5050// See whether redistributing the -1 bytes gives a shuffle that can be
5051// implemented using P. If so, set Transform to a VPERM-like permute vector
5052// that, when applied to the result of P, gives the original permute in Bytes.
5054 const Permute &P,
5055 SmallVectorImpl<int> &Transform) {
5056 unsigned To = 0;
5057 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5058 int Elt = Bytes[From];
5059 if (Elt < 0)
5060 // Byte number From of the result is undefined.
5061 Transform[From] = -1;
5062 else {
5063 while (P.Bytes[To] != Elt) {
5064 To += 1;
5065 if (To == SystemZ::VectorBytes)
5066 return false;
5067 }
5068 Transform[From] = To;
5069 }
5070 }
5071 return true;
5072}
5073
5074// As above, but search for a matching permute.
5075static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5076 SmallVectorImpl<int> &Transform) {
5077 for (auto &P : PermuteForms)
5078 if (matchDoublePermute(Bytes, P, Transform))
5079 return &P;
5080 return nullptr;
5081}
5082
5083// Convert the mask of the given shuffle op into a byte-level mask,
5084// as if it had type vNi8.
5085static bool getVPermMask(SDValue ShuffleOp,
5086 SmallVectorImpl<int> &Bytes) {
5087 EVT VT = ShuffleOp.getValueType();
5088 unsigned NumElements = VT.getVectorNumElements();
5089 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5090
5091 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5092 Bytes.resize(NumElements * BytesPerElement, -1);
5093 for (unsigned I = 0; I < NumElements; ++I) {
5094 int Index = VSN->getMaskElt(I);
5095 if (Index >= 0)
5096 for (unsigned J = 0; J < BytesPerElement; ++J)
5097 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5098 }
5099 return true;
5100 }
5101 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5102 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5103 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5104 Bytes.resize(NumElements * BytesPerElement, -1);
5105 for (unsigned I = 0; I < NumElements; ++I)
5106 for (unsigned J = 0; J < BytesPerElement; ++J)
5107 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5108 return true;
5109 }
5110 return false;
5111}
5112
5113// Bytes is a VPERM-like permute vector, except that -1 is used for
5114// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5115// the result come from a contiguous sequence of bytes from one input.
5116// Set Base to the selector for the first byte if so.
5117static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5118 unsigned BytesPerElement, int &Base) {
5119 Base = -1;
5120 for (unsigned I = 0; I < BytesPerElement; ++I) {
5121 if (Bytes[Start + I] >= 0) {
5122 unsigned Elem = Bytes[Start + I];
5123 if (Base < 0) {
5124 Base = Elem - I;
5125 // Make sure the bytes would come from one input operand.
5126 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5127 return false;
5128 } else if (unsigned(Base) != Elem - I)
5129 return false;
5130 }
5131 }
5132 return true;
5133}
5134
5135// Bytes is a VPERM-like permute vector, except that -1 is used for
5136// undefined bytes. Return true if it can be performed using VSLDB.
5137// When returning true, set StartIndex to the shift amount and OpNo0
5138// and OpNo1 to the VPERM operands that should be used as the first
5139// and second shift operand respectively.
5141 unsigned &StartIndex, unsigned &OpNo0,
5142 unsigned &OpNo1) {
5143 int OpNos[] = { -1, -1 };
5144 int Shift = -1;
5145 for (unsigned I = 0; I < 16; ++I) {
5146 int Index = Bytes[I];
5147 if (Index >= 0) {
5148 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5149 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5150 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5151 if (Shift < 0)
5152 Shift = ExpectedShift;
5153 else if (Shift != ExpectedShift)
5154 return false;
5155 // Make sure that the operand mappings are consistent with previous
5156 // elements.
5157 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5158 return false;
5159 OpNos[ModelOpNo] = RealOpNo;
5160 }
5161 }
5162 StartIndex = Shift;
5163 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5164}
5165
5166// Create a node that performs P on operands Op0 and Op1, casting the
5167// operands to the appropriate type. The type of the result is determined by P.
5169 const Permute &P, SDValue Op0, SDValue Op1) {
5170 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5171 // elements of a PACK are twice as wide as the outputs.
5172 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5173 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5174 P.Operand);
5175 // Cast both operands to the appropriate type.
5176 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5177 SystemZ::VectorBytes / InBytes);
5178 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5179 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5180 SDValue Op;
5181 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5182 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5183 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5184 } else if (P.Opcode == SystemZISD::PACK) {
5185 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5186 SystemZ::VectorBytes / P.Operand);
5187 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5188 } else {
5189 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5190 }
5191 return Op;
5192}
5193
5194static bool isZeroVector(SDValue N) {
5195 if (N->getOpcode() == ISD::BITCAST)
5196 N = N->getOperand(0);
5197 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5198 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5199 return Op->getZExtValue() == 0;
5200 return ISD::isBuildVectorAllZeros(N.getNode());
5201}
5202
5203// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5204static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5205 for (unsigned I = 0; I < Num ; I++)
5206 if (isZeroVector(Ops[I]))
5207 return I;
5208 return UINT32_MAX;
5209}
5210
5211// Bytes is a VPERM-like permute vector, except that -1 is used for
5212// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5213// VSLDB or VPERM.
5215 SDValue *Ops,
5216 const SmallVectorImpl<int> &Bytes) {
5217 for (unsigned I = 0; I < 2; ++I)
5218 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5219
5220 // First see whether VSLDB can be used.
5221 unsigned StartIndex, OpNo0, OpNo1;
5222 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5223 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5224 Ops[OpNo1],
5225 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5226
5227 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5228 // eliminate a zero vector by reusing any zero index in the permute vector.
5229 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5230 if (ZeroVecIdx != UINT32_MAX) {
5231 bool MaskFirst = true;
5232 int ZeroIdx = -1;
5233 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5234 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5235 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5236 if (OpNo == ZeroVecIdx && I == 0) {
5237 // If the first byte is zero, use mask as first operand.
5238 ZeroIdx = 0;
5239 break;
5240 }
5241 if (OpNo != ZeroVecIdx && Byte == 0) {
5242 // If mask contains a zero, use it by placing that vector first.
5243 ZeroIdx = I + SystemZ::VectorBytes;
5244 MaskFirst = false;
5245 break;
5246 }
5247 }
5248 if (ZeroIdx != -1) {
5249 SDValue IndexNodes[SystemZ::VectorBytes];
5250 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5251 if (Bytes[I] >= 0) {
5252 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5253 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5254 if (OpNo == ZeroVecIdx)
5255 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5256 else {
5257 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5258 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5259 }
5260 } else
5261 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5262 }
5263 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5264 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5265 if (MaskFirst)
5266 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5267 Mask);
5268 else
5269 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5270 Mask);
5271 }
5272 }
5273
5274 SDValue IndexNodes[SystemZ::VectorBytes];
5275 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5276 if (Bytes[I] >= 0)
5277 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5278 else
5279 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5280 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5281 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5282 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5283}
5284
5285namespace {
5286// Describes a general N-operand vector shuffle.
5287struct GeneralShuffle {
5288 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}
5289 void addUndef();
5290 bool add(SDValue, unsigned);
5291 SDValue getNode(SelectionDAG &, const SDLoc &);
5292 void tryPrepareForUnpack();
5293 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5294 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5295
5296 // The operands of the shuffle.
5298
5299 // Index I is -1 if byte I of the result is undefined. Otherwise the
5300 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5301 // Bytes[I] / SystemZ::VectorBytes.
5303
5304 // The type of the shuffle result.
5305 EVT VT;
5306
5307 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5308 unsigned UnpackFromEltSize;
5309};
5310}
5311
5312// Add an extra undefined element to the shuffle.
5313void GeneralShuffle::addUndef() {
5314 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5315 for (unsigned I = 0; I < BytesPerElement; ++I)
5316 Bytes.push_back(-1);
5317}
5318
5319// Add an extra element to the shuffle, taking it from element Elem of Op.
5320// A null Op indicates a vector input whose value will be calculated later;
5321// there is at most one such input per shuffle and it always has the same
5322// type as the result. Aborts and returns false if the source vector elements
5323// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
5324// LLVM they become implicitly extended, but this is rare and not optimized.
5325bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
5326 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5327
5328 // The source vector can have wider elements than the result,
5329 // either through an explicit TRUNCATE or because of type legalization.
5330 // We want the least significant part.
5331 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
5332 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
5333
5334 // Return false if the source elements are smaller than their destination
5335 // elements.
5336 if (FromBytesPerElement < BytesPerElement)
5337 return false;
5338
5339 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
5340 (FromBytesPerElement - BytesPerElement));
5341
5342 // Look through things like shuffles and bitcasts.
5343 while (Op.getNode()) {
5344 if (Op.getOpcode() == ISD::BITCAST)
5345 Op = Op.getOperand(0);
5346 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
5347 // See whether the bytes we need come from a contiguous part of one
5348 // operand.
5350 if (!getVPermMask(Op, OpBytes))
5351 break;
5352 int NewByte;
5353 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
5354 break;
5355 if (NewByte < 0) {
5356 addUndef();
5357 return true;
5358 }
5359 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
5360 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
5361 } else if (Op.isUndef()) {
5362 addUndef();
5363 return true;
5364 } else
5365 break;
5366 }
5367
5368 // Make sure that the source of the extraction is in Ops.
5369 unsigned OpNo = 0;
5370 for (; OpNo < Ops.size(); ++OpNo)
5371 if (Ops[OpNo] == Op)
5372 break;
5373 if (OpNo == Ops.size())
5374 Ops.push_back(Op);
5375
5376 // Add the element to Bytes.
5377 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
5378 for (unsigned I = 0; I < BytesPerElement; ++I)
5379 Bytes.push_back(Base + I);
5380
5381 return true;
5382}
5383
5384// Return SDNodes for the completed shuffle.
5385SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
5386 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
5387
5388 if (Ops.size() == 0)
5389 return DAG.getUNDEF(VT);
5390
5391 // Use a single unpack if possible as the last operation.
5392 tryPrepareForUnpack();
5393
5394 // Make sure that there are at least two shuffle operands.
5395 if (Ops.size() == 1)
5396 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
5397
5398 // Create a tree of shuffles, deferring root node until after the loop.
5399 // Try to redistribute the undefined elements of non-root nodes so that
5400 // the non-root shuffles match something like a pack or merge, then adjust
5401 // the parent node's permute vector to compensate for the new order.
5402 // Among other things, this copes with vectors like <2 x i16> that were
5403 // padded with undefined elements during type legalization.
5404 //
5405 // In the best case this redistribution will lead to the whole tree
5406 // using packs and merges. It should rarely be a loss in other cases.
5407 unsigned Stride = 1;
5408 for (; Stride * 2 < Ops.size(); Stride *= 2) {
5409 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
5410 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
5411
5412 // Create a mask for just these two operands.
5414 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5415 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
5416 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
5417 if (OpNo == I)
5418 NewBytes[J] = Byte;
5419 else if (OpNo == I + Stride)
5420 NewBytes[J] = SystemZ::VectorBytes + Byte;
5421 else
5422 NewBytes[J] = -1;
5423 }
5424 // See if it would be better to reorganize NewMask to avoid using VPERM.
5426 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
5427 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
5428 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
5429 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5430 if (NewBytes[J] >= 0) {
5431 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
5432 "Invalid double permute");
5433 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
5434 } else
5435 assert(NewBytesMap[J] < 0 && "Invalid double permute");
5436 }
5437 } else {
5438 // Just use NewBytes on the operands.
5439 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
5440 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
5441 if (NewBytes[J] >= 0)
5442 Bytes[J] = I * SystemZ::VectorBytes + J;
5443 }
5444 }
5445 }
5446
5447 // Now we just have 2 inputs. Put the second operand in Ops[1].
5448 if (Stride > 1) {
5449 Ops[1] = Ops[Stride];
5450 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5451 if (Bytes[I] >= int(SystemZ::VectorBytes))
5452 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
5453 }
5454
5455 // Look for an instruction that can do the permute without resorting
5456 // to VPERM.
5457 unsigned OpNo0, OpNo1;
5458 SDValue Op;
5459 if (unpackWasPrepared() && Ops[1].isUndef())
5460 Op = Ops[0];
5461 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
5462 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
5463 else
5464 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
5465
5466 Op = insertUnpackIfPrepared(DAG, DL, Op);
5467
5468 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5469}
5470
5471#ifndef NDEBUG
5472static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
5473 dbgs() << Msg.c_str() << " { ";
5474 for (unsigned i = 0; i < Bytes.size(); i++)
5475 dbgs() << Bytes[i] << " ";
5476 dbgs() << "}\n";
5477}
5478#endif
5479
5480// If the Bytes vector matches an unpack operation, prepare to do the unpack
5481// after all else by removing the zero vector and the effect of the unpack on
5482// Bytes.
5483void GeneralShuffle::tryPrepareForUnpack() {
5484 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
5485 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
5486 return;
5487
5488 // Only do this if removing the zero vector reduces the depth, otherwise
5489 // the critical path will increase with the final unpack.
5490 if (Ops.size() > 2 &&
5491 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
5492 return;
5493
5494 // Find an unpack that would allow removing the zero vector from Ops.
5495 UnpackFromEltSize = 1;
5496 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
5497 bool MatchUnpack = true;
5499 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
5500 unsigned ToEltSize = UnpackFromEltSize * 2;
5501 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
5502 if (!IsZextByte)
5503 SrcBytes.push_back(Bytes[Elt]);
5504 if (Bytes[Elt] != -1) {
5505 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
5506 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
5507 MatchUnpack = false;
5508 break;
5509 }
5510 }
5511 }
5512 if (MatchUnpack) {
5513 if (Ops.size() == 2) {
5514 // Don't use unpack if a single source operand needs rearrangement.
5515 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
5516 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
5517 UnpackFromEltSize = UINT_MAX;
5518 return;
5519 }
5520 }
5521 break;
5522 }
5523 }
5524 if (UnpackFromEltSize > 4)
5525 return;
5526
5527 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
5528 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
5529 << ".\n";
5530 dumpBytes(Bytes, "Original Bytes vector:"););
5531
5532 // Apply the unpack in reverse to the Bytes array.
5533 unsigned B = 0;
5534 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
5535 Elt += UnpackFromEltSize;
5536 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
5537 Bytes[B] = Bytes[Elt];
5538 }
5539 while (B < SystemZ::VectorBytes)
5540 Bytes[B++] = -1;
5541
5542 // Remove the zero vector from Ops
5543 Ops.erase(&Ops[ZeroVecOpNo]);
5544 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5545 if (Bytes[I] >= 0) {
5546 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5547 if (OpNo > ZeroVecOpNo)
5548 Bytes[I] -= SystemZ::VectorBytes;
5549 }
5550
5551 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
5552 dbgs() << "\n";);
5553}
5554
5555SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
5556 const SDLoc &DL,
5557 SDValue Op) {
5558 if (!unpackWasPrepared())
5559 return Op;
5560 unsigned InBits = UnpackFromEltSize * 8;
5561 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
5562 SystemZ::VectorBits / InBits);
5563 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
5564 unsigned OutBits = InBits * 2;
5565 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
5566 SystemZ::VectorBits / OutBits);
5567 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
5568}
5569
5570// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
5572 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
5573 if (!Op.getOperand(I).isUndef())
5574 return false;
5575 return true;
5576}
5577
5578// Return a vector of type VT that contains Value in the first element.
5579// The other elements don't matter.
5581 SDValue Value) {
5582 // If we have a constant, replicate it to all elements and let the
5583 // BUILD_VECTOR lowering take care of it.
5584 if (Value.getOpcode() == ISD::Constant ||
5585 Value.getOpcode() == ISD::ConstantFP) {
5587 return DAG.getBuildVector(VT, DL, Ops);
5588 }
5589 if (Value.isUndef())
5590 return DAG.getUNDEF(VT);
5591 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
5592}
5593
5594// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
5595// element 1. Used for cases in which replication is cheap.
5597 SDValue Op0, SDValue Op1) {
5598 if (Op0.isUndef()) {
5599 if (Op1.isUndef())
5600 return DAG.getUNDEF(VT);
5601 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
5602 }
5603 if (Op1.isUndef())
5604 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
5605 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
5606 buildScalarToVector(DAG, DL, VT, Op0),
5607 buildScalarToVector(DAG, DL, VT, Op1));
5608}
5609
5610// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
5611// vector for them.
5613 SDValue Op1) {
5614 if (Op0.isUndef() && Op1.isUndef())
5615 return DAG.getUNDEF(MVT::v2i64);
5616 // If one of the two inputs is undefined then replicate the other one,
5617 // in order to avoid using another register unnecessarily.
5618 if (Op0.isUndef())
5619 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5620 else if (Op1.isUndef())
5621 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5622 else {
5623 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5624 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5625 }
5626 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
5627}
5628
5629// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
5630// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
5631// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
5632// would benefit from this representation and return it if so.
5634 BuildVectorSDNode *BVN) {
5635 EVT VT = BVN->getValueType(0);
5636 unsigned NumElements = VT.getVectorNumElements();
5637
5638 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
5639 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
5640 // need a BUILD_VECTOR, add an additional placeholder operand for that
5641 // BUILD_VECTOR and store its operands in ResidueOps.
5642 GeneralShuffle GS(VT);
5644 bool FoundOne = false;
5645 for (unsigned I = 0; I < NumElements; ++I) {
5646 SDValue Op = BVN->getOperand(I);
5647 if (Op.getOpcode() == ISD::TRUNCATE)
5648 Op = Op.getOperand(0);
5649 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5650 Op.getOperand(1).getOpcode() == ISD::Constant) {
5651 unsigned Elem = Op.getConstantOperandVal(1);
5652 if (!GS.add(Op.getOperand(0), Elem))
5653 return SDValue();
5654 FoundOne = true;
5655 } else if (Op.isUndef()) {
5656 GS.addUndef();
5657 } else {
5658 if (!GS.add(SDValue(), ResidueOps.size()))
5659 return SDValue();
5660 ResidueOps.push_back(BVN->getOperand(I));
5661 }
5662 }
5663
5664 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
5665 if (!FoundOne)
5666 return SDValue();
5667
5668 // Create the BUILD_VECTOR for the remaining elements, if any.
5669 if (!ResidueOps.empty()) {
5670 while (ResidueOps.size() < NumElements)
5671 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
5672 for (auto &Op : GS.Ops) {
5673 if (!Op.getNode()) {
5674 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
5675 break;
5676 }
5677 }
5678 }
5679 return GS.getNode(DAG, SDLoc(BVN));
5680}
5681
5682bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
5683 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
5684 return true;
5685 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
5686 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
5687 return true;
5688 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
5689 return true;
5690 return false;
5691}
5692
5693// Combine GPR scalar values Elems into a vector of type VT.
5694SDValue
5695SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5696 SmallVectorImpl<SDValue> &Elems) const {
5697 // See whether there is a single replicated value.
5699 unsigned int NumElements = Elems.size();
5700 unsigned int Count = 0;
5701 for (auto Elem : Elems) {
5702 if (!Elem.isUndef()) {
5703 if (!Single.getNode())
5704 Single = Elem;
5705 else if (Elem != Single) {
5706 Single = SDValue();
5707 break;
5708 }
5709 Count += 1;
5710 }
5711 }
5712 // There are three cases here:
5713 //
5714 // - if the only defined element is a loaded one, the best sequence
5715 // is a replicating load.
5716 //
5717 // - otherwise, if the only defined element is an i64 value, we will
5718 // end up with the same VLVGP sequence regardless of whether we short-cut
5719 // for replication or fall through to the later code.
5720 //
5721 // - otherwise, if the only defined element is an i32 or smaller value,
5722 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
5723 // This is only a win if the single defined element is used more than once.
5724 // In other cases we're better off using a single VLVGx.
5725 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
5726 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
5727
5728 // If all elements are loads, use VLREP/VLEs (below).
5729 bool AllLoads = true;
5730 for (auto Elem : Elems)
5731 if (!isVectorElementLoad(Elem)) {
5732 AllLoads = false;
5733 break;
5734 }
5735
5736 // The best way of building a v2i64 from two i64s is to use VLVGP.
5737 if (VT == MVT::v2i64 && !AllLoads)
5738 return joinDwords(DAG, DL, Elems[0], Elems[1]);
5739
5740 // Use a 64-bit merge high to combine two doubles.
5741 if (VT == MVT::v2f64 && !AllLoads)
5742 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5743
5744 // Build v4f32 values directly from the FPRs:
5745 //
5746 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
5747 // V V VMRHF
5748 // <ABxx> <CDxx>
5749 // V VMRHG
5750 // <ABCD>
5751 if (VT == MVT::v4f32 && !AllLoads) {
5752 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5753 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
5754 // Avoid unnecessary undefs by reusing the other operand.
5755 if (Op01.isUndef())
5756 Op01 = Op23;
5757 else if (Op23.isUndef())
5758 Op23 = Op01;
5759 // Merging identical replications is a no-op.
5760 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
5761 return Op01;
5762 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
5763 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
5765 DL, MVT::v2i64, Op01, Op23);
5766 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5767 }
5768
5769 // Collect the constant terms.
5772
5773 unsigned NumConstants = 0;
5774 for (unsigned I = 0; I < NumElements; ++I) {
5775 SDValue Elem = Elems[I];
5776 if (Elem.getOpcode() == ISD::Constant ||
5777 Elem.getOpcode() == ISD::ConstantFP) {
5778 NumConstants += 1;
5779 Constants[I] = Elem;
5780 Done[I] = true;
5781 }
5782 }
5783 // If there was at least one constant, fill in the other elements of
5784 // Constants with undefs to get a full vector constant and use that
5785 // as the starting point.
5787 SDValue ReplicatedVal;
5788 if (NumConstants > 0) {
5789 for (unsigned I = 0; I < NumElements; ++I)
5790 if (!Constants[I].getNode())
5791 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
5792 Result = DAG.getBuildVector(VT, DL, Constants);
5793 } else {
5794 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
5795 // avoid a false dependency on any previous contents of the vector
5796 // register.
5797
5798 // Use a VLREP if at least one element is a load. Make sure to replicate
5799 // the load with the most elements having its value.
5800 std::map<const SDNode*, unsigned> UseCounts;
5801 SDNode *LoadMaxUses = nullptr;
5802 for (unsigned I = 0; I < NumElements; ++I)
5803 if (isVectorElementLoad(Elems[I])) {
5804 SDNode *Ld = Elems[I].getNode();
5805 UseCounts[Ld]++;
5806 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
5807 LoadMaxUses = Ld;
5808 }
5809 if (LoadMaxUses != nullptr) {
5810 ReplicatedVal = SDValue(LoadMaxUses, 0);
5811 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
5812 } else {
5813 // Try to use VLVGP.
5814 unsigned I1 = NumElements / 2 - 1;
5815 unsigned I2 = NumElements - 1;
5816 bool Def1 = !Elems[I1].isUndef();
5817 bool Def2 = !Elems[I2].isUndef();
5818 if (Def1 || Def2) {
5819 SDValue Elem1 = Elems[Def1 ? I1 : I2];
5820 SDValue Elem2 = Elems[Def2 ? I2 : I1];
5821 Result = DAG.getNode(ISD::BITCAST, DL, VT,
5822 joinDwords(DAG, DL, Elem1, Elem2));
5823 Done[I1] = true;
5824 Done[I2] = true;
5825 } else
5826 Result = DAG.getUNDEF(VT);
5827 }
5828 }
5829
5830 // Use VLVGx to insert the other elements.
5831 for (unsigned I = 0; I < NumElements; ++I)
5832 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
5833 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
5834 DAG.getConstant(I, DL, MVT::i32));
5835 return Result;
5836}
5837
5838SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
5839 SelectionDAG &DAG) const {
5840 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
5841 SDLoc DL(Op);
5842 EVT VT = Op.getValueType();
5843
5844 if (BVN->isConstant()) {
5845 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
5846 return Op;
5847
5848 // Fall back to loading it from memory.
5849 return SDValue();
5850 }
5851
5852 // See if we should use shuffles to construct the vector from other vectors.
5853 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
5854 return Res;
5855
5856 // Detect SCALAR_TO_VECTOR conversions.
5858 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
5859
5860 // Otherwise use buildVector to build the vector up from GPRs.
5861 unsigned NumElements = Op.getNumOperands();
5863 for (unsigned I = 0; I < NumElements; ++I)
5864 Ops[I] = Op.getOperand(I);
5865 return buildVector(DAG, DL, VT, Ops);
5866}
5867
5868SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
5869 SelectionDAG &DAG) const {
5870 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
5871 SDLoc DL(Op);
5872 EVT VT = Op.getValueType();
5873 unsigned NumElements = VT.getVectorNumElements();
5874
5875 if (VSN->isSplat()) {
5876 SDValue Op0 = Op.getOperand(0);
5877 unsigned Index = VSN->getSplatIndex();
5879 "Splat index should be defined and in first operand");
5880 // See whether the value we're splatting is directly available as a scalar.
5881 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5883 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
5884 // Otherwise keep it as a vector-to-vector operation.
5885 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
5886 DAG.getTargetConstant(Index, DL, MVT::i32));
5887 }
5888
5889 GeneralShuffle GS(VT);
5890 for (unsigned I = 0; I < NumElements; ++I) {
5891 int Elt = VSN->getMaskElt(I);
5892 if (Elt < 0)
5893 GS.addUndef();
5894 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
5895 unsigned(Elt) % NumElements))
5896 return SDValue();
5897 }
5898 return GS.getNode(DAG, SDLoc(VSN));
5899}
5900
5901SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
5902 SelectionDAG &DAG) const {
5903 SDLoc DL(Op);
5904 // Just insert the scalar into element 0 of an undefined vector.
5905 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
5906 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
5907 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
5908}
5909
5910SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
5911 SelectionDAG &DAG) const {
5912 // Handle insertions of floating-point values.
5913 SDLoc DL(Op);
5914 SDValue Op0 = Op.getOperand(0);
5915 SDValue Op1 = Op.getOperand(1);
5916 SDValue Op2 = Op.getOperand(2);
5917 EVT VT = Op.getValueType();
5918
5919 // Insertions into constant indices of a v2f64 can be done using VPDI.
5920 // However, if the inserted value is a bitcast or a constant then it's
5921 // better to use GPRs, as below.
5922 if (VT == MVT::v2f64 &&
5923 Op1.getOpcode() != ISD::BITCAST &&
5924 Op1.getOpcode() != ISD::ConstantFP &&
5925 Op2.getOpcode() == ISD::Constant) {
5926 uint64_t Index = Op2->getAsZExtVal();
5927 unsigned Mask = VT.getVectorNumElements() - 1;
5928 if (Index <= Mask)
5929 return Op;
5930 }
5931
5932 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
5934 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
5935 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
5936 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
5937 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
5938 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5939}
5940
5941SDValue
5942SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
5943 SelectionDAG &DAG) const {
5944 // Handle extractions of floating-point values.
5945 SDLoc DL(Op);
5946 SDValue Op0 = Op.getOperand(0);
5947 SDValue Op1 = Op.getOperand(1);
5948 EVT VT = Op.getValueType();
5949 EVT VecVT = Op0.getValueType();
5950
5951 // Extractions of constant indices can be done directly.
5952 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
5953 uint64_t Index = CIndexN->getZExtValue();
5954 unsigned Mask = VecVT.getVectorNumElements() - 1;
5955 if (Index <= Mask)
5956 return Op;
5957 }
5958
5959 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
5960 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
5961 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
5962 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
5963 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
5964 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5965}
5966
5967SDValue SystemZTargetLowering::
5968lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5969 SDValue PackedOp = Op.getOperand(0);
5970 EVT OutVT = Op.getValueType();
5971 EVT InVT = PackedOp.getValueType();
5972 unsigned ToBits = OutVT.getScalarSizeInBits();
5973 unsigned FromBits = InVT.getScalarSizeInBits();
5974 do {
5975 FromBits *= 2;
5976 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
5977 SystemZ::VectorBits / FromBits);
5978 PackedOp =
5979 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
5980 } while (FromBits != ToBits);
5981 return PackedOp;
5982}
5983
5984// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
5985SDValue SystemZTargetLowering::
5986lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5987 SDValue PackedOp = Op.getOperand(0);
5988 SDLoc DL(Op);
5989 EVT OutVT = Op.getValueType();
5990 EVT InVT = PackedOp.getValueType();
5991 unsigned InNumElts = InVT.getVectorNumElements();
5992 unsigned OutNumElts = OutVT.getVectorNumElements();
5993 unsigned NumInPerOut = InNumElts / OutNumElts;
5994
5995 SDValue ZeroVec =
5996 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
5997
5998 SmallVector<int, 16> Mask(InNumElts);
5999 unsigned ZeroVecElt = InNumElts;
6000 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
6001 unsigned MaskElt = PackedElt * NumInPerOut;
6002 unsigned End = MaskElt + NumInPerOut - 1;
6003 for (; MaskElt < End; MaskElt++)
6004 Mask[MaskElt] = ZeroVecElt++;
6005 Mask[MaskElt] = PackedElt;
6006 }
6007 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
6008 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
6009}
6010
6011SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6012 unsigned ByScalar) const {
6013 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6014 SDValue Op0 = Op.getOperand(0);
6015 SDValue Op1 = Op.getOperand(1);
6016 SDLoc DL(Op);
6017 EVT VT = Op.getValueType();
6018 unsigned ElemBitSize = VT.getScalarSizeInBits();
6019
6020 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6021 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6022 APInt SplatBits, SplatUndef;
6023 unsigned SplatBitSize;
6024 bool HasAnyUndefs;
6025 // Check for constant splats. Use ElemBitSize as the minimum element
6026 // width and reject splats that need wider elements.
6027 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6028 ElemBitSize, true) &&
6029 SplatBitSize == ElemBitSize) {
6030 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6031 DL, MVT::i32);
6032 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6033 }
6034 // Check for variable splats.
6035 BitVector UndefElements;
6036 SDValue Splat = BVN->getSplatValue(&UndefElements);
6037 if (Splat) {
6038 // Since i32 is the smallest legal type, we either need a no-op
6039 // or a truncation.
6040 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6041 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6042 }
6043 }
6044
6045 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6046 // and the shift amount is directly available in a GPR.
6047 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6048 if (VSN->isSplat()) {
6049 SDValue VSNOp0 = VSN->getOperand(0);
6050 unsigned Index = VSN->getSplatIndex();
6052 "Splat index should be defined and in first operand");
6053 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6054 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6055 // Since i32 is the smallest legal type, we either need a no-op
6056 // or a truncation.
6057 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6058 VSNOp0.getOperand(Index));
6059 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6060 }
6061 }
6062 }
6063
6064 // Otherwise just treat the current form as legal.
6065 return Op;
6066}
6067
6069 SDLoc dl(Op);
6070 SDValue Src = Op.getOperand(0);
6071 MVT DstVT = Op.getSimpleValueType();
6072
6073 AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Op.getNode());
6074 unsigned SrcAS = N->getSrcAddressSpace();
6075
6076 assert(SrcAS != N->getDestAddressSpace() &&
6077 "addrspacecast must be between different address spaces");
6078
6079 // addrspacecast [0 <- 1] : Assinging a ptr32 value to a 64-bit pointer.
6080 // addrspacecast [1 <- 0] : Assigining a 64-bit pointer to a ptr32 value.
6081 if (SrcAS == SYSTEMZAS::PTR32 && DstVT == MVT::i64) {
6082 Op = DAG.getNode(ISD::AND, dl, MVT::i32, Src,
6083 DAG.getConstant(0x7fffffff, dl, MVT::i32));
6084 Op = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op);
6085 } else if (DstVT == MVT::i32) {
6086 Op = DAG.getNode(ISD::TRUNCATE, dl, DstVT, Src);
6087 Op = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
6088 DAG.getConstant(0x7fffffff, dl, MVT::i32));
6089 Op = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op);
6090 } else {
6091 report_fatal_error("Bad address space in addrspacecast");
6092 }
6093 return Op;
6094}
6095
6096SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
6097 SelectionDAG &DAG) const {
6098 SDLoc DL(Op);
6099 MVT ResultVT = Op.getSimpleValueType();
6100 SDValue Arg = Op.getOperand(0);
6101 unsigned Check = Op.getConstantOperandVal(1);
6102
6103 unsigned TDCMask = 0;
6104 if (Check & fcSNan)
6106 if (Check & fcQNan)
6108 if (Check & fcPosInf)
6110 if (Check & fcNegInf)
6112 if (Check & fcPosNormal)
6114 if (Check & fcNegNormal)
6116 if (Check & fcPosSubnormal)
6118 if (Check & fcNegSubnormal)
6120 if (Check & fcPosZero)
6121 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
6122 if (Check & fcNegZero)
6123 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
6124 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
6125
6126 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
6127 return getCCResult(DAG, Intr);
6128}
6129
6130SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
6131 SelectionDAG &DAG) const {
6132 SDLoc DL(Op);
6133 SDValue Chain = Op.getOperand(0);
6134
6135 // STCKF only supports a memory operand, so we have to use a temporary.
6136 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
6137 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
6138 MachinePointerInfo MPI =
6140
6141 // Use STCFK to store the TOD clock into the temporary.
6142 SDValue StoreOps[] = {Chain, StackPtr};
6143 Chain = DAG.getMemIntrinsicNode(
6144 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
6146
6147 // And read it back from there.
6148 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
6149}
6150
6152 SelectionDAG &DAG) const {
6153 switch (Op.getOpcode()) {
6154 case ISD::FRAMEADDR:
6155 return lowerFRAMEADDR(Op, DAG);
6156 case ISD::RETURNADDR:
6157 return lowerRETURNADDR(Op, DAG);
6158 case ISD::BR_CC:
6159 return lowerBR_CC(Op, DAG);
6160 case ISD::SELECT_CC:
6161 return lowerSELECT_CC(Op, DAG);
6162 case ISD::SETCC:
6163 return lowerSETCC(Op, DAG);
6164 case ISD::STRICT_FSETCC:
6165 return lowerSTRICT_FSETCC(Op, DAG, false);
6167 return lowerSTRICT_FSETCC(Op, DAG, true);
6168 case ISD::GlobalAddress:
6169 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
6171 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
6172 case ISD::BlockAddress:
6173 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
6174 case ISD::JumpTable:
6175 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
6176 case ISD::ConstantPool:
6177 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
6178 case ISD::BITCAST:
6179 return lowerBITCAST(Op, DAG);
6180 case ISD::VASTART:
6181 return lowerVASTART(Op, DAG);
6182 case ISD::VACOPY:
6183 return lowerVACOPY(Op, DAG);
6185 return lowerDYNAMIC_STACKALLOC(Op, DAG);
6187 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
6188 case ISD::SMUL_LOHI:
6189 return lowerSMUL_LOHI(Op, DAG);
6190 case ISD::UMUL_LOHI:
6191 return lowerUMUL_LOHI(Op, DAG);
6192 case ISD::SDIVREM:
6193 return lowerSDIVREM(Op, DAG);
6194 case ISD::UDIVREM:
6195 return lowerUDIVREM(Op, DAG);
6196 case ISD::SADDO:
6197 case ISD::SSUBO:
6198 case ISD::UADDO:
6199 case ISD::USUBO:
6200 return lowerXALUO(Op, DAG);
6201 case ISD::UADDO_CARRY:
6202 case ISD::USUBO_CARRY:
6203 return lowerUADDSUBO_CARRY(Op, DAG);
6204 case ISD::OR:
6205 return lowerOR(Op, DAG);
6206 case ISD::CTPOP:
6207 return lowerCTPOP(Op, DAG);
6208 case ISD::VECREDUCE_ADD:
6209 return lowerVECREDUCE_ADD(Op, DAG);
6210 case ISD::ATOMIC_FENCE:
6211 return lowerATOMIC_FENCE(Op, DAG);
6212 case ISD::ATOMIC_SWAP:
6213 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
6214 case ISD::ATOMIC_STORE:
6215 case ISD::ATOMIC_LOAD:
6216 return lowerATOMIC_LDST_I128(Op, DAG);
6218 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
6220 return lowerATOMIC_LOAD_SUB(Op, DAG);
6222 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
6224 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
6226 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
6228 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
6230 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
6232 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
6234 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
6236 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
6238 return lowerATOMIC_CMP_SWAP(Op, DAG);
6239 case ISD::STACKSAVE:
6240 return lowerSTACKSAVE(Op, DAG);
6241 case ISD::STACKRESTORE:
6242 return lowerSTACKRESTORE(Op, DAG);
6243 case ISD::PREFETCH:
6244 return lowerPREFETCH(Op, DAG);
6246 return lowerINTRINSIC_W_CHAIN(Op, DAG);
6248 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
6249 case ISD::BUILD_VECTOR:
6250 return lowerBUILD_VECTOR(Op, DAG);
6252 return lowerVECTOR_SHUFFLE(Op, DAG);
6254 return lowerSCALAR_TO_VECTOR(Op, DAG);
6256 return lowerINSERT_VECTOR_ELT(Op, DAG);
6258 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6260 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
6262 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
6263 case ISD::SHL:
6264 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
6265 case ISD::SRL:
6266 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
6267 case ISD::SRA:
6268 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
6269 case ISD::ADDRSPACECAST:
6270 return lowerAddrSpaceCast(Op, DAG);
6271 case ISD::ROTL:
6272 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
6273 case ISD::IS_FPCLASS:
6274 return lowerIS_FPCLASS(Op, DAG);
6275 case ISD::GET_ROUNDING:
6276 return lowerGET_ROUNDING(Op, DAG);
6278 return lowerREADCYCLECOUNTER(Op, DAG);
6279 default:
6280 llvm_unreachable("Unexpected node to lower");
6281 }
6282}
6283
6285 const SDLoc &SL) {
6286 // If i128 is legal, just use a normal bitcast.
6287 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
6288 return DAG.getBitcast(MVT::f128, Src);
6289
6290 // Otherwise, f128 must live in FP128, so do a partwise move.
6292 &SystemZ::FP128BitRegClass);
6293
6294 SDValue Hi, Lo;
6295 std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);
6296
6297 Hi = DAG.getBitcast(MVT::f64, Hi);
6298 Lo = DAG.getBitcast(MVT::f64, Lo);
6299
6300 SDNode *Pair = DAG.getMachineNode(
6301 SystemZ::REG_SEQUENCE, SL, MVT::f128,
6302 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo,
6303 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi,
6304 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)});
6305 return SDValue(Pair, 0);
6306}
6307
6309 const SDLoc &SL) {
6310 // If i128 is legal, just use a normal bitcast.
6311 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
6312 return DAG.getBitcast(MVT::i128, Src);
6313
6314 // Otherwise, f128 must live in FP128, so do a partwise move.
6316 &SystemZ::FP128BitRegClass);
6317
6318 SDValue LoFP =
6319 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
6320 SDValue HiFP =
6321 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);
6322 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
6323 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
6324
6325 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);
6326}
6327
6328// Lower operations with invalid operand or result types (currently used
6329// only for 128-bit integer types).
6330void
6333 SelectionDAG &DAG) const {
6334 switch (N->getOpcode()) {
6335 case ISD::ATOMIC_LOAD: {
6336 SDLoc DL(N);
6337 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
6338 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
6339 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6341 DL, Tys, Ops, MVT::i128, MMO);
6342
6343 SDValue Lowered = lowerGR128ToI128(DAG, Res);
6344 if (N->getValueType(0) == MVT::f128)
6345 Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);
6346 Results.push_back(Lowered);
6347 Results.push_back(Res.getValue(1));
6348 break;
6349 }
6350 case ISD::ATOMIC_STORE: {
6351 SDLoc DL(N);
6352 SDVTList Tys = DAG.getVTList(MVT::Other);
6353 SDValue Val = N->getOperand(1);
6354 if (Val.getValueType() == MVT::f128)
6355 Val = expandBitCastF128ToI128(DAG, Val, DL);
6356 Val = lowerI128ToGR128(DAG, Val);
6357
6358 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
6359 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6361 DL, Tys, Ops, MVT::i128, MMO);
6362 // We have to enforce sequential consistency by performing a
6363 // serialization operation after the store.
6364 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
6366 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
6367 MVT::Other, Res), 0);
6368 Results.push_back(Res);
6369 break;
6370 }
6372 SDLoc DL(N);
6373 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
6374 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
6375 lowerI128ToGR128(DAG, N->getOperand(2)),
6376 lowerI128ToGR128(DAG, N->getOperand(3)) };
6377 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6379 DL, Tys, Ops, MVT::i128, MMO);
6380 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
6382 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
6383 Results.push_back(lowerGR128ToI128(DAG, Res));
6384 Results.push_back(Success);
6385 Results.push_back(Res.getValue(2));
6386 break;
6387 }
6388 case ISD::BITCAST: {
6389 SDValue Src = N->getOperand(0);
6390 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
6391 !useSoftFloat()) {
6392 SDLoc DL(N);
6393 Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));
6394 }
6395 break;
6396 }
6397 default:
6398 llvm_unreachable("Unexpected node to lower");
6399 }
6400}
6401
6402void
6405 SelectionDAG &DAG) const {
6406 return LowerOperationWrapper(N, Results, DAG);
6407}
6408
6409const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
6410#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
6411 switch ((SystemZISD::NodeType)Opcode) {
6412 case SystemZISD::FIRST_NUMBER: break;
6413 OPCODE(RET_GLUE);
6414 OPCODE(CALL);
6415 OPCODE(SIBCALL);
6416 OPCODE(TLS_GDCALL);
6417 OPCODE(TLS_LDCALL);
6418 OPCODE(PCREL_WRAPPER);
6419 OPCODE(PCREL_OFFSET);
6420 OPCODE(ICMP);
6421 OPCODE(FCMP);
6422 OPCODE(STRICT_FCMP);
6423 OPCODE(STRICT_FCMPS);
6424 OPCODE(TM);
6425 OPCODE(BR_CCMASK);
6426 OPCODE(SELECT_CCMASK);
6427 OPCODE(ADJDYNALLOC);
6428 OPCODE(PROBED_ALLOCA);
6429 OPCODE(POPCNT);
6430 OPCODE(SMUL_LOHI);
6431 OPCODE(UMUL_LOHI);
6432 OPCODE(SDIVREM);
6433 OPCODE(UDIVREM);
6434 OPCODE(SADDO);
6435 OPCODE(SSUBO);
6436 OPCODE(UADDO);
6437 OPCODE(USUBO);
6438 OPCODE(ADDCARRY);
6439 OPCODE(SUBCARRY);
6440 OPCODE(GET_CCMASK);
6441 OPCODE(MVC);
6442 OPCODE(NC);
6443 OPCODE(OC);
6444 OPCODE(XC);
6445 OPCODE(CLC);
6446 OPCODE(MEMSET_MVC);
6447 OPCODE(STPCPY);
6448 OPCODE(STRCMP);
6449 OPCODE(SEARCH_STRING);
6450 OPCODE(IPM);
6451 OPCODE(TBEGIN);
6452 OPCODE(TBEGIN_NOFLOAT);
6453 OPCODE(TEND);
6454 OPCODE(BYTE_MASK);
6455 OPCODE(ROTATE_MASK);
6456 OPCODE(REPLICATE);
6457 OPCODE(JOIN_DWORDS);
6458 OPCODE(SPLAT);
6459 OPCODE(MERGE_HIGH);
6460 OPCODE(MERGE_LOW);
6461 OPCODE(SHL_DOUBLE);
6462 OPCODE(PERMUTE_DWORDS);
6463 OPCODE(PERMUTE);
6464 OPCODE(PACK);
6465 OPCODE(PACKS_CC);
6466 OPCODE(PACKLS_CC);
6467 OPCODE(UNPACK_HIGH);
6468 OPCODE(UNPACKL_HIGH);
6469 OPCODE(UNPACK_LOW);
6470 OPCODE(UNPACKL_LOW);
6471 OPCODE(VSHL_BY_SCALAR);
6472 OPCODE(VSRL_BY_SCALAR);
6473 OPCODE(VSRA_BY_SCALAR);
6474 OPCODE(VROTL_BY_SCALAR);
6475 OPCODE(VSUM);
6476 OPCODE(VACC);
6477 OPCODE(VSCBI);
6478 OPCODE(VAC);
6479 OPCODE(VSBI);
6480 OPCODE(VACCC);
6481 OPCODE(VSBCBI);
6482 OPCODE(VICMPE);
6483 OPCODE(VICMPH);
6484 OPCODE(VICMPHL);
6485 OPCODE(VICMPES);
6486 OPCODE(VICMPHS);
6487 OPCODE(VICMPHLS);
6488 OPCODE(VFCMPE);
6489 OPCODE(STRICT_VFCMPE);
6490 OPCODE(STRICT_VFCMPES);
6491 OPCODE(VFCMPH);
6492 OPCODE(STRICT_VFCMPH);
6493 OPCODE(STRICT_VFCMPHS);
6494 OPCODE(VFCMPHE);
6495 OPCODE(STRICT_VFCMPHE);
6496 OPCODE(STRICT_VFCMPHES);
6497 OPCODE(VFCMPES);
6498 OPCODE(VFCMPHS);
6499 OPCODE(VFCMPHES);
6500 OPCODE(VFTCI);
6501 OPCODE(VEXTEND);
6502 OPCODE(STRICT_VEXTEND);
6503 OPCODE(VROUND);
6504 OPCODE(STRICT_VROUND);
6505 OPCODE(VTM);
6506 OPCODE(SCMP128HI);
6507 OPCODE(UCMP128HI);
6508 OPCODE(VFAE_CC);
6509 OPCODE(VFAEZ_CC);
6510 OPCODE(VFEE_CC);
6511 OPCODE(VFEEZ_CC);
6512 OPCODE(VFENE_CC);
6513 OPCODE(VFENEZ_CC);
6514 OPCODE(VISTR_CC);
6515 OPCODE(VSTRC_CC);
6516 OPCODE(VSTRCZ_CC);
6517 OPCODE(VSTRS_CC);
6518 OPCODE(VSTRSZ_CC);
6519 OPCODE(TDC);
6520 OPCODE(ATOMIC_SWAPW);
6521 OPCODE(ATOMIC_LOADW_ADD);
6522 OPCODE(ATOMIC_LOADW_SUB);
6523 OPCODE(ATOMIC_LOADW_AND);
6524 OPCODE(ATOMIC_LOADW_OR);
6525 OPCODE(ATOMIC_LOADW_XOR);
6526 OPCODE(ATOMIC_LOADW_NAND);
6527 OPCODE(ATOMIC_LOADW_MIN);
6528 OPCODE(ATOMIC_LOADW_MAX);
6529 OPCODE(ATOMIC_LOADW_UMIN);
6530 OPCODE(ATOMIC_LOADW_UMAX);
6531 OPCODE(ATOMIC_CMP_SWAPW);
6532 OPCODE(ATOMIC_CMP_SWAP);
6533 OPCODE(ATOMIC_LOAD_128);
6534 OPCODE(ATOMIC_STORE_128);
6535 OPCODE(ATOMIC_CMP_SWAP_128);
6536 OPCODE(LRV);
6537 OPCODE(STRV);
6538 OPCODE(VLER);
6539 OPCODE(VSTER);
6540 OPCODE(STCKF);
6542 OPCODE(ADA_ENTRY);
6543 }
6544 return nullptr;
6545#undef OPCODE
6546}
6547
6548// Return true if VT is a vector whose elements are a whole number of bytes
6549// in width. Also check for presence of vector support.
6550bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
6551 if (!Subtarget.hasVector())
6552 return false;
6553
6554 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
6555}
6556
6557// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
6558// producing a result of type ResVT. Op is a possibly bitcast version
6559// of the input vector and Index is the index (based on type VecVT) that
6560// should be extracted. Return the new extraction if a simplification
6561// was possible or if Force is true.
6562SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
6563 EVT VecVT, SDValue Op,
6564 unsigned Index,
6565 DAGCombinerInfo &DCI,
6566 bool Force) const {
6567 SelectionDAG &DAG = DCI.DAG;
6568
6569 // The number of bytes being extracted.
6570 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6571
6572 for (;;) {
6573 unsigned Opcode = Op.getOpcode();
6574 if (Opcode == ISD::BITCAST)
6575 // Look through bitcasts.
6576 Op = Op.getOperand(0);
6577 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
6578 canTreatAsByteVector(Op.getValueType())) {
6579 // Get a VPERM-like permute mask and see whether the bytes covered
6580 // by the extracted element are a contiguous sequence from one
6581 // source operand.
6583 if (!getVPermMask(Op, Bytes))
6584 break;
6585 int First;
6586 if (!getShuffleInput(Bytes, Index * BytesPerElement,
6587 BytesPerElement, First))
6588 break;
6589 if (First < 0)
6590 return DAG.getUNDEF(ResVT);
6591 // Make sure the contiguous sequence starts at a multiple of the
6592 // original element size.
6593 unsigned Byte = unsigned(First) % Bytes.size();
6594 if (Byte % BytesPerElement != 0)
6595 break;
6596 // We can get the extracted value directly from an input.
6597 Index = Byte / BytesPerElement;
6598 Op = Op.getOperand(unsigned(First) / Bytes.size());
6599 Force = true;
6600 } else if (Opcode == ISD::BUILD_VECTOR &&
6601 canTreatAsByteVector(Op.getValueType())) {
6602 // We can only optimize this case if the BUILD_VECTOR elements are
6603 // at least as wide as the extracted value.
6604 EVT OpVT = Op.getValueType();
6605 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6606 if (OpBytesPerElement < BytesPerElement)
6607 break;
6608 // Make sure that the least-significant bit of the extracted value
6609 // is the least significant bit of an input.
6610 unsigned End = (Index + 1) * BytesPerElement;
6611 if (End % OpBytesPerElement != 0)
6612 break;
6613 // We're extracting the low part of one operand of the BUILD_VECTOR.
6614 Op = Op.getOperand(End / OpBytesPerElement - 1);
6615 if (!Op.getValueType().isInteger()) {
6616 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
6617 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
6618 DCI.AddToWorklist(Op.getNode());
6619 }
6620 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
6621 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
6622 if (VT != ResVT) {
6623 DCI.AddToWorklist(Op.getNode());
6624 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
6625 }
6626 return Op;
6627 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6629 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6630 canTreatAsByteVector(Op.getValueType()) &&
6631 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
6632 // Make sure that only the unextended bits are significant.
6633 EVT ExtVT = Op.getValueType();
6634 EVT OpVT = Op.getOperand(0).getValueType();
6635 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
6636 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6637 unsigned Byte = Index * BytesPerElement;
6638 unsigned SubByte = Byte % ExtBytesPerElement;
6639 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
6640 if (SubByte < MinSubByte ||
6641 SubByte + BytesPerElement > ExtBytesPerElement)
6642 break;
6643 // Get the byte offset of the unextended element
6644 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
6645 // ...then add the byte offset relative to that element.
6646 Byte += SubByte - MinSubByte;
6647 if (Byte % BytesPerElement != 0)
6648 break;
6649 Op = Op.getOperand(0);
6650 Index = Byte / BytesPerElement;
6651 Force = true;
6652 } else
6653 break;
6654 }
6655 if (Force) {
6656 if (Op.getValueType() != VecVT) {
6657 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
6658 DCI.AddToWorklist(Op.getNode());
6659 }
6660 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
6661 DAG.getConstant(Index, DL, MVT::i32));
6662 }
6663 return SDValue();
6664}
6665
6666// Optimize vector operations in scalar value Op on the basis that Op
6667// is truncated to TruncVT.
6668SDValue SystemZTargetLowering::combineTruncateExtract(
6669 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
6670 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
6671 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
6672 // of type TruncVT.
6673 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6674 TruncVT.getSizeInBits() % 8 == 0) {
6675 SDValue Vec = Op.getOperand(0);
6676 EVT VecVT = Vec.getValueType();
6677 if (canTreatAsByteVector(VecVT)) {
6678 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
6679 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6680 unsigned TruncBytes = TruncVT.getStoreSize();
6681 if (BytesPerElement % TruncBytes == 0) {
6682 // Calculate the value of Y' in the above description. We are
6683 // splitting the original elements into Scale equal-sized pieces
6684 // and for truncation purposes want the last (least-significant)
6685 // of these pieces for IndexN. This is easiest to do by calculating
6686 // the start index of the following element and then subtracting 1.
6687 unsigned Scale = BytesPerElement / TruncBytes;
6688 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
6689
6690 // Defer the creation of the bitcast from X to combineExtract,
6691 // which might be able to optimize the extraction.
6692 VecVT = EVT::getVectorVT(*DCI.DAG.getContext(),
6693 MVT::getIntegerVT(TruncBytes * 8),
6694 VecVT.getStoreSize() / TruncBytes);
6695 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
6696 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
6697 }
6698 }
6699 }
6700 }
6701 return SDValue();
6702}
6703
6704SDValue SystemZTargetLowering::combineZERO_EXTEND(
6705 SDNode *N, DAGCombinerInfo &DCI) const {
6706 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
6707 SelectionDAG &DAG = DCI.DAG;
6708 SDValue N0 = N->getOperand(0);
6709 EVT VT = N->getValueType(0);
6711 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
6712 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6713 if (TrueOp && FalseOp) {
6714 SDLoc DL(N0);
6715 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
6716 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
6717 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
6718 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
6719 // If N0 has multiple uses, change other uses as well.
6720 if (!N0.hasOneUse()) {
6721 SDValue TruncSelect =
6722 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
6723 DCI.CombineTo(N0.getNode(), TruncSelect);
6724 }
6725 return NewSelect;
6726 }
6727 }
6728 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
6729 // of the result is smaller than the size of X and all the truncated bits
6730 // of X are already zero.
6731 if (N0.getOpcode() == ISD::XOR &&
6732 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
6733 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6734 N0.getOperand(1).getOpcode() == ISD::Constant) {
6735 SDValue X = N0.getOperand(0).getOperand(0);
6736 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
6737 KnownBits Known = DAG.computeKnownBits(X);
6738 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
6739 N0.getValueSizeInBits(),
6740 VT.getSizeInBits());
6741 if (TruncatedBits.isSubsetOf(Known.Zero)) {
6742 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
6744 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
6745 X, DAG.getConstant(Mask, SDLoc(N0), VT));
6746 }
6747 }
6748 }
6749
6750 return SDValue();
6751}
6752
6753SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
6754 SDNode *N, DAGCombinerInfo &DCI) const {
6755 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
6756 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
6757 // into (select_cc LHS, RHS, -1, 0, COND)
6758 SelectionDAG &DAG = DCI.DAG;
6759 SDValue N0 = N->getOperand(0);
6760 EVT VT = N->getValueType(0);
6761 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
6762 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
6763 N0 = N0.getOperand(0);
6764 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
6765 SDLoc DL(N0);
6766 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
6767 DAG.getAllOnesConstant(DL, VT),
6768 DAG.getConstant(0, DL, VT), N0.getOperand(2) };
6769 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
6770 }
6771 return SDValue();
6772}
6773
6774SDValue SystemZTargetLowering::combineSIGN_EXTEND(
6775 SDNode *N, DAGCombinerInfo &DCI) const {
6776 // Convert (sext (ashr (shl X, C1), C2)) to
6777 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
6778 // cheap as narrower ones.
6779 SelectionDAG &DAG = DCI.DAG;
6780 SDValue N0 = N->getOperand(0);
6781 EVT VT = N->getValueType(0);
6782 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
6783 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6784 SDValue Inner = N0.getOperand(0);
6785 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
6786 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
6787 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
6788 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
6789 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
6790 EVT ShiftVT = N0.getOperand(1).getValueType();
6791 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
6792 Inner.getOperand(0));
6793 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
6794 DAG.getConstant(NewShlAmt, SDLoc(Inner),
6795 ShiftVT));
6796 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
6797 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
6798 }
6799 }
6800 }
6801
6802 return SDValue();
6803}
6804
6805SDValue SystemZTargetLowering::combineMERGE(
6806 SDNode *N, DAGCombinerInfo &DCI) const {
6807 SelectionDAG &DAG = DCI.DAG;
6808 unsigned Opcode = N->getOpcode();
6809 SDValue Op0 = N->getOperand(0);
6810 SDValue Op1 = N->getOperand(1);
6811 if (Op0.getOpcode() == ISD::BITCAST)
6812 Op0 = Op0.getOperand(0);
6814 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
6815 // for v4f32.
6816 if (Op1 == N->getOperand(0))
6817 return Op1;
6818 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
6819 EVT VT = Op1.getValueType();
6820 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
6821 if (ElemBytes <= 4) {
6822 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
6825 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
6826 SystemZ::VectorBytes / ElemBytes / 2);
6827 if (VT != InVT) {
6828 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
6829 DCI.AddToWorklist(Op1.getNode());
6830 }
6831 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
6832 DCI.AddToWorklist(Op.getNode());
6833 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
6834 }
6835 }
6836 return SDValue();
6837}
6838
6839static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
6840 SDNode *&HiPart) {
6841 LoPart = HiPart = nullptr;
6842
6843 // Scan through all users.
6844 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
6845 UI != UIEnd; ++UI) {
6846 // Skip the uses of the chain.
6847 if (UI.getUse().getResNo() != 0)
6848 continue;
6849
6850 // Verify every user is a TRUNCATE to i64 of the low or high half.
6851 SDNode *User = *UI;
6852 bool IsLoPart = true;
6853 if (User->getOpcode() == ISD::SRL &&
6854 User->getOperand(1).getOpcode() == ISD::Constant &&
6855 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
6856 User = *User->use_begin();
6857 IsLoPart = false;
6858 }
6859 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)
6860 return false;
6861
6862 if (IsLoPart) {
6863 if (LoPart)
6864 return false;
6865 LoPart = User;
6866 } else {
6867 if (HiPart)
6868 return false;
6869 HiPart = User;
6870 }
6871 }
6872 return true;
6873}
6874
6875static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
6876 SDNode *&HiPart) {
6877 LoPart = HiPart = nullptr;
6878
6879 // Scan through all users.
6880 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
6881 UI != UIEnd; ++UI) {
6882 // Skip the uses of the chain.
6883 if (UI.getUse().getResNo() != 0)
6884 continue;
6885
6886 // Verify every user is an EXTRACT_SUBREG of the low or high half.
6887 SDNode *User = *UI;
6888 if (!User->hasOneUse() || !User->isMachineOpcode() ||
6889 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
6890 return false;
6891
6892 switch (User->getConstantOperandVal(1)) {
6893 case SystemZ::subreg_l64:
6894 if (LoPart)
6895 return false;
6896 LoPart = User;
6897 break;
6898 case SystemZ::subreg_h64:
6899 if (HiPart)
6900 return false;
6901 HiPart = User;
6902 break;
6903 default:
6904 return false;
6905 }
6906 }
6907 return true;
6908}
6909
6910SDValue SystemZTargetLowering::combineLOAD(
6911 SDNode *N, DAGCombinerInfo &DCI) const {
6912 SelectionDAG &DAG = DCI.DAG;
6913 EVT LdVT = N->getValueType(0);
6914 if (auto *LN = dyn_cast<LoadSDNode>(N)) {
6915 if (LN->getAddressSpace() == SYSTEMZAS::PTR32) {
6916 MVT PtrVT = getPointerTy(DAG.getDataLayout());
6917 MVT LoadNodeVT = LN->getBasePtr().getSimpleValueType();
6918 if (PtrVT != LoadNodeVT) {
6919 SDLoc DL(LN);
6920 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(
6921 DL, PtrVT, LN->getBasePtr(), SYSTEMZAS::PTR32, 0);
6922 return DAG.getExtLoad(LN->getExtensionType(), DL, LN->getValueType(0),
6923 LN->getChain(), AddrSpaceCast, LN->getMemoryVT(),
6924 LN->getMemOperand());
6925 }
6926 }
6927 }
6928 SDLoc DL(N);
6929
6930 // Replace a 128-bit load that is used solely to move its value into GPRs
6931 // by separate loads of both halves.
6932 LoadSDNode *LD = cast<LoadSDNode>(N);
6933 if (LD->isSimple() && ISD::isNormalLoad(LD)) {
6934 SDNode *LoPart, *HiPart;
6935 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||
6936 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {
6937 // Rewrite each extraction as an independent load.
6938 SmallVector<SDValue, 2> ArgChains;
6939 if (HiPart) {
6940 SDValue EltLoad = DAG.getLoad(
6941 HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(),
6942 LD->getPointerInfo(), LD->getOriginalAlign(),
6943 LD->getMemOperand()->getFlags(), LD->getAAInfo());
6944
6945 DCI.CombineTo(HiPart, EltLoad, true);
6946 ArgChains.push_back(EltLoad.getValue(1));
6947 }
6948 if (LoPart) {
6949 SDValue EltLoad = DAG.getLoad(
6950 LoPart->getValueType(0), DL, LD->getChain(),
6951 DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)),
6952 LD->getPointerInfo().getWithOffset(8), LD->getOriginalAlign(),
6953 LD->getMemOperand()->getFlags(), LD->getAAInfo());
6954
6955 DCI.CombineTo(LoPart, EltLoad, true);
6956 ArgChains.push_back(EltLoad.getValue(1));
6957 }
6958
6959 // Collect all chains via TokenFactor.
6960 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains);
6961 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
6962 DCI.AddToWorklist(Chain.getNode());
6963 return SDValue(N, 0);
6964 }
6965 }
6966
6967 if (LdVT.isVector() || LdVT.isInteger())
6968 return SDValue();
6969 // Transform a scalar load that is REPLICATEd as well as having other
6970 // use(s) to the form where the other use(s) use the first element of the
6971 // REPLICATE instead of the load. Otherwise instruction selection will not
6972 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
6973 // point loads.
6974
6975 SDValue Replicate;
6976 SmallVector<SDNode*, 8> OtherUses;
6977 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
6978 UI != UE; ++UI) {
6979 if (UI->getOpcode() == SystemZISD::REPLICATE) {
6980 if (Replicate)
6981 return SDValue(); // Should never happen
6982 Replicate = SDValue(*UI, 0);
6983 }
6984 else if (UI.getUse().getResNo() == 0)
6985 OtherUses.push_back(*UI);
6986 }
6987 if (!Replicate || OtherUses.empty())
6988 return SDValue();
6989
6990 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
6991 Replicate, DAG.getConstant(0, DL, MVT::i32));
6992 // Update uses of the loaded Value while preserving old chains.
6993 for (SDNode *U : OtherUses) {
6995 for (SDValue Op : U->ops())
6996 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
6997 DAG.UpdateNodeOperands(U, Ops);
6998 }
6999 return SDValue(N, 0);
7000}
7001
7002bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
7003 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
7004 return true;
7005 if (Subtarget.hasVectorEnhancements2())
7006 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
7007 return true;
7008 return false;
7009}
7010
7012 if (!VT.isVector() || !VT.isSimple() ||
7013 VT.getSizeInBits() != 128 ||
7014 VT.getScalarSizeInBits() % 8 != 0)
7015 return false;
7016
7017 unsigned NumElts = VT.getVectorNumElements();
7018 for (unsigned i = 0; i < NumElts; ++i) {
7019 if (M[i] < 0) continue; // ignore UNDEF indices
7020 if ((unsigned) M[i] != NumElts - 1 - i)
7021 return false;
7022 }
7023
7024 return true;
7025}
7026
7027static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
7028 for (auto *U : StoredVal->uses()) {
7029 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
7030 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
7031 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
7032 continue;
7033 } else if (isa<BuildVectorSDNode>(U)) {
7034 SDValue BuildVector = SDValue(U, 0);
7035 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
7036 isOnlyUsedByStores(BuildVector, DAG))
7037 continue;
7038 }
7039 return false;
7040 }
7041 return true;
7042}
7043
7044static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
7045 SDValue &HiPart) {
7046 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
7047 return false;
7048
7049 SDValue Op0 = Val.getOperand(0);
7050 SDValue Op1 = Val.getOperand(1);
7051
7052 if (Op0.getOpcode() == ISD::SHL)
7053 std::swap(Op0, Op1);
7054 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
7055 Op1.getOperand(1).getOpcode() != ISD::Constant ||
7056 Op1.getConstantOperandVal(1) != 64)
7057 return false;
7058 Op1 = Op1.getOperand(0);
7059
7060 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
7061 Op0.getOperand(0).getValueType() != MVT::i64)
7062 return false;
7063 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
7064 Op1.getOperand(0).getValueType() != MVT::i64)
7065 return false;
7066
7067 LoPart = Op0.getOperand(0);
7068 HiPart = Op1.getOperand(0);
7069 return true;
7070}
7071
7072static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
7073 SDValue &HiPart) {
7074 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
7075 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
7076 return false;
7077
7078 if (Val->getNumOperands() != 5 ||
7079 Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
7080 Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||
7081 Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)
7082 return false;
7083
7084 LoPart = Val->getOperand(1);
7085 HiPart = Val->getOperand(3);
7086 return true;
7087}
7088
7089SDValue SystemZTargetLowering::combineSTORE(
7090 SDNode *N, DAGCombinerInfo &DCI) const {
7091 SelectionDAG &DAG = DCI.DAG;
7092 auto *SN = cast<StoreSDNode>(N);
7093 auto &Op1 = N->getOperand(1);
7094 EVT MemVT = SN->getMemoryVT();
7095
7096 if (SN->getAddressSpace() == SYSTEMZAS::PTR32) {
7097 MVT PtrVT = getPointerTy(DAG.getDataLayout());
7098 MVT StoreNodeVT = SN->getBasePtr().getSimpleValueType();
7099 if (PtrVT != StoreNodeVT) {
7100 SDLoc DL(SN);
7101 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(DL, PtrVT, SN->getBasePtr(),
7102 SYSTEMZAS::PTR32, 0);
7103 return DAG.getStore(SN->getChain(), DL, SN->getValue(), AddrSpaceCast,
7104 SN->getPointerInfo(), SN->getOriginalAlign(),
7105 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7106 }
7107 }
7108
7109 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
7110 // for the extraction to be done on a vMiN value, so that we can use VSTE.
7111 // If X has wider elements then convert it to:
7112 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
7113 if (MemVT.isInteger() && SN->isTruncatingStore()) {
7114 if (SDValue Value =
7115 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
7116 DCI.AddToWorklist(Value.getNode());
7117
7118 // Rewrite the store with the new form of stored value.
7119 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
7120 SN->getBasePtr(), SN->getMemoryVT(),
7121 SN->getMemOperand());
7122 }
7123 }
7124 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
7125 if (!SN->isTruncatingStore() &&
7126 Op1.getOpcode() == ISD::BSWAP &&
7127 Op1.getNode()->hasOneUse() &&
7128 canLoadStoreByteSwapped(Op1.getValueType())) {
7129
7130 SDValue BSwapOp = Op1.getOperand(0);
7131
7132 if (BSwapOp.getValueType() == MVT::i16)
7133 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
7134
7135 SDValue Ops[] = {
7136 N->getOperand(0), BSwapOp, N->getOperand(2)
7137 };
7138
7139 return
7140 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
7141 Ops, MemVT, SN->getMemOperand());
7142 }
7143 // Combine STORE (element-swap) into VSTER
7144 if (!SN->isTruncatingStore() &&
7145 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
7146 Op1.getNode()->hasOneUse() &&
7147 Subtarget.hasVectorEnhancements2()) {
7148 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
7149 ArrayRef<int> ShuffleMask = SVN->getMask();
7150 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
7151 SDValue Ops[] = {
7152 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
7153 };
7154
7156 DAG.getVTList(MVT::Other),
7157 Ops, MemVT, SN->getMemOperand());
7158 }
7159 }
7160
7161 // Combine STORE (READCYCLECOUNTER) into STCKF.
7162 if (!SN->isTruncatingStore() &&
7164 Op1.hasOneUse() &&
7165 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
7166 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
7168 DAG.getVTList(MVT::Other),
7169 Ops, MemVT, SN->getMemOperand());
7170 }
7171
7172 // Transform a store of a 128-bit value moved from parts into two stores.
7173 if (SN->isSimple() && ISD::isNormalStore(SN)) {
7174 SDValue LoPart, HiPart;
7175 if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||
7176 (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {
7177 SDLoc DL(SN);
7178 SDValue Chain0 =
7179 DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(),
7180 SN->getPointerInfo(), SN->getOriginalAlign(),
7181 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7182 SDValue Chain1 =
7183 DAG.getStore(SN->getChain(), DL, LoPart,
7184 DAG.getObjectPtrOffset(DL, SN->getBasePtr(),
7186 SN->getPointerInfo().getWithOffset(8),
7187 SN->getOriginalAlign(),
7188 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7189
7190 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
7191 }
7192 }
7193
7194 // Replicate a reg or immediate with VREP instead of scalar multiply or
7195 // immediate load. It seems best to do this during the first DAGCombine as
7196 // it is straight-forward to handle the zero-extend node in the initial
7197 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
7198 // extracting an i16 element from a v16i8 vector).
7199 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
7200 isOnlyUsedByStores(Op1, DAG)) {
7201 SDValue Word = SDValue();
7202 EVT WordVT;
7203
7204 // Find a replicated immediate and return it if found in Word and its
7205 // type in WordVT.
7206 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
7207 // Some constants are better handled with a scalar store.
7208 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
7209 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
7210 return;
7211 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, C->getZExtValue()));
7212 if (VCI.isVectorConstantLegal(Subtarget) &&
7213 VCI.Opcode == SystemZISD::REPLICATE) {
7214 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
7215 WordVT = VCI.VecVT.getScalarType();
7216 }
7217 };
7218
7219 // Find a replicated register and return it if found in Word and its type
7220 // in WordVT.
7221 auto FindReplicatedReg = [&](SDValue MulOp) {
7222 EVT MulVT = MulOp.getValueType();
7223 if (MulOp->getOpcode() == ISD::MUL &&
7224 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
7225 // Find a zero extended value and its type.
7226 SDValue LHS = MulOp->getOperand(0);
7227 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
7228 WordVT = LHS->getOperand(0).getValueType();
7229 else if (LHS->getOpcode() == ISD::AssertZext)
7230 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
7231 else
7232 return;
7233 // Find a replicating constant, e.g. 0x00010001.
7234 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
7236 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
7237 if (VCI.isVectorConstantLegal(Subtarget) &&
7238 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
7239 WordVT == VCI.VecVT.getScalarType())
7240 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
7241 }
7242 }
7243 };
7244
7245 if (isa<BuildVectorSDNode>(Op1) &&
7246 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
7247 SDValue SplatVal = Op1->getOperand(0);
7248 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
7249 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
7250 else
7251 FindReplicatedReg(SplatVal);
7252 } else {
7253 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
7254 FindReplicatedImm(C, MemVT.getStoreSize());
7255 else
7256 FindReplicatedReg(Op1);
7257 }
7258
7259 if (Word != SDValue()) {
7260 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
7261 "Bad type handling");
7262 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
7263 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
7264 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
7265 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
7266 SN->getBasePtr(), SN->getMemOperand());
7267 }
7268 }
7269
7270 return SDValue();
7271}
7272
7273SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
7274 SDNode *N, DAGCombinerInfo &DCI) const {
7275 SelectionDAG &DAG = DCI.DAG;
7276 // Combine element-swap (LOAD) into VLER
7277 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7278 N->getOperand(0).hasOneUse() &&
7279 Subtarget.hasVectorEnhancements2()) {
7280 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
7281 ArrayRef<int> ShuffleMask = SVN->getMask();
7282 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
7283 SDValue Load = N->getOperand(0);
7284 LoadSDNode *LD = cast<LoadSDNode>(Load);
7285
7286 // Create the element-swapping load.
7287 SDValue Ops[] = {
7288 LD->getChain(), // Chain
7289 LD->getBasePtr() // Ptr
7290 };
7291 SDValue ESLoad =
7293 DAG.getVTList(LD->getValueType(0), MVT::Other),
7294 Ops, LD->getMemoryVT(), LD->getMemOperand());
7295
7296 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
7297 // by the load dead.
7298 DCI.CombineTo(N, ESLoad);
7299
7300 // Next, combine the load away, we give it a bogus result value but a real
7301 // chain result. The result value is dead because the shuffle is dead.
7302 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
7303
7304 // Return N so it doesn't get rechecked!
7305 return SDValue(N, 0);
7306 }
7307 }
7308
7309 return SDValue();
7310}
7311
7312SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
7313 SDNode *N, DAGCombinerInfo &DCI) const {
7314 SelectionDAG &DAG = DCI.DAG;
7315
7316 if (!Subtarget.hasVector())
7317 return SDValue();
7318
7319 // Look through bitcasts that retain the number of vector elements.
7320 SDValue Op = N->getOperand(0);
7321 if (Op.getOpcode() == ISD::BITCAST &&
7322 Op.getValueType().isVector() &&
7323 Op.getOperand(0).getValueType().isVector() &&
7324 Op.getValueType().getVectorNumElements() ==
7325 Op.getOperand(0).getValueType().getVectorNumElements())
7326 Op = Op.getOperand(0);
7327
7328 // Pull BSWAP out of a vector extraction.
7329 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
7330 EVT VecVT = Op.getValueType();
7331 EVT EltVT = VecVT.getVectorElementType();
7332 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
7333 Op.getOperand(0), N->getOperand(1));
7334 DCI.AddToWorklist(Op.getNode());
7335 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
7336 if (EltVT != N->getValueType(0)) {
7337 DCI.AddToWorklist(Op.getNode());
7338 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
7339 }
7340 return Op;
7341 }
7342
7343 // Try to simplify a vector extraction.
7344 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
7345 SDValue Op0 = N->getOperand(0);
7346 EVT VecVT = Op0.getValueType();
7347 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
7348 IndexN->getZExtValue(), DCI, false);
7349 }
7350 return SDValue();
7351}
7352
7353SDValue SystemZTargetLowering::combineJOIN_DWORDS(
7354 SDNode *N, DAGCombinerInfo &DCI) const {
7355 SelectionDAG &DAG = DCI.DAG;
7356 // (join_dwords X, X) == (replicate X)
7357 if (N->getOperand(0) == N->getOperand(1))
7358 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
7359 N->getOperand(0));
7360 return SDValue();
7361}
7362
7364 SDValue Chain1 = N1->getOperand(0);
7365 SDValue Chain2 = N2->getOperand(0);
7366
7367 // Trivial case: both nodes take the same chain.
7368 if (Chain1 == Chain2)
7369 return Chain1;
7370
7371 // FIXME - we could handle more complex cases via TokenFactor,
7372 // assuming we can verify that this would not create a cycle.
7373 return SDValue();
7374}
7375
7376SDValue SystemZTargetLowering::combineFP_ROUND(
7377 SDNode *N, DAGCombinerInfo &DCI) const {
7378
7379 if (!Subtarget.hasVector())
7380 return SDValue();
7381
7382 // (fpround (extract_vector_elt X 0))
7383 // (fpround (extract_vector_elt X 1)) ->
7384 // (extract_vector_elt (VROUND X) 0)
7385 // (extract_vector_elt (VROUND X) 2)
7386 //
7387 // This is a special case since the target doesn't really support v2f32s.
7388 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7389 SelectionDAG &DAG = DCI.DAG;
7390 SDValue Op0 = N->getOperand(OpNo);
7391 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
7393 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
7394 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7395 Op0.getConstantOperandVal(1) == 0) {
7396 SDValue Vec = Op0.getOperand(0);
7397 for (auto *U : Vec->uses()) {
7398 if (U != Op0.getNode() && U->hasOneUse() &&
7399 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7400 U->getOperand(0) == Vec &&
7401 U->getOperand(1).getOpcode() == ISD::Constant &&
7402 U->getConstantOperandVal(1) == 1) {
7403 SDValue OtherRound = SDValue(*U->use_begin(), 0);
7404 if (OtherRound.getOpcode() == N->getOpcode() &&
7405 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
7406 OtherRound.getValueType() == MVT::f32) {
7407 SDValue VRound, Chain;
7408 if (N->isStrictFPOpcode()) {
7409 Chain = MergeInputChains(N, OtherRound.getNode());
7410 if (!Chain)
7411 continue;
7413 {MVT::v4f32, MVT::Other}, {Chain, Vec});
7414 Chain = VRound.getValue(1);
7415 } else
7416 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
7417 MVT::v4f32, Vec);
7418 DCI.AddToWorklist(VRound.getNode());
7419 SDValue Extract1 =
7420 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
7421 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
7422 DCI.AddToWorklist(Extract1.getNode());
7423 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
7424 if (Chain)
7425 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
7426 SDValue Extract0 =
7427 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
7428 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7429 if (Chain)
7430 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7431 N->getVTList(), Extract0, Chain);
7432 return Extract0;
7433 }
7434 }
7435 }
7436 }
7437 return SDValue();
7438}
7439
7440SDValue SystemZTargetLowering::combineFP_EXTEND(
7441 SDNode *N, DAGCombinerInfo &DCI) const {
7442
7443 if (!Subtarget.hasVector())
7444 return SDValue();
7445
7446 // (fpextend (extract_vector_elt X 0))
7447 // (fpextend (extract_vector_elt X 2)) ->
7448 // (extract_vector_elt (VEXTEND X) 0)
7449 // (extract_vector_elt (VEXTEND X) 1)
7450 //
7451 // This is a special case since the target doesn't really support v2f32s.
7452 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7453 SelectionDAG &DAG = DCI.DAG;
7454 SDValue Op0 = N->getOperand(OpNo);
7455 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
7457 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
7458 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7459 Op0.getConstantOperandVal(1) == 0) {
7460 SDValue Vec = Op0.getOperand(0);
7461 for (auto *U : Vec->uses()) {
7462 if (U != Op0.getNode() && U->hasOneUse() &&
7463 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7464 U->getOperand(0) == Vec &&
7465 U->getOperand(1).getOpcode() == ISD::Constant &&
7466 U->getConstantOperandVal(1) == 2) {
7467 SDValue OtherExtend = SDValue(*U->use_begin(), 0);
7468 if (OtherExtend.getOpcode() == N->getOpcode() &&
7469 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
7470 OtherExtend.getValueType() == MVT::f64) {
7471 SDValue VExtend, Chain;
7472 if (N->isStrictFPOpcode()) {
7473 Chain = MergeInputChains(N, OtherExtend.getNode());
7474 if (!Chain)
7475 continue;
7476 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
7477 {MVT::v2f64, MVT::Other}, {Chain, Vec});
7478 Chain = VExtend.getValue(1);
7479 } else
7480 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
7481 MVT::v2f64, Vec);
7482 DCI.AddToWorklist(VExtend.getNode());
7483 SDValue Extract1 =
7484 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
7485 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
7486 DCI.AddToWorklist(Extract1.getNode());
7487 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
7488 if (Chain)
7489 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
7490 SDValue Extract0 =
7491 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
7492 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7493 if (Chain)
7494 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7495 N->getVTList(), Extract0, Chain);
7496 return Extract0;
7497 }
7498 }
7499 }
7500 }
7501 return SDValue();
7502}
7503
7504SDValue SystemZTargetLowering::combineINT_TO_FP(
7505 SDNode *N, DAGCombinerInfo &DCI) const {
7506 if (DCI.Level != BeforeLegalizeTypes)
7507 return SDValue();
7508 SelectionDAG &DAG = DCI.DAG;
7509 LLVMContext &Ctx = *DAG.getContext();
7510 unsigned Opcode = N->getOpcode();
7511 EVT OutVT = N->getValueType(0);
7512 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
7513 SDValue Op = N->getOperand(0);
7514 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
7515 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
7516
7517 // Insert an extension before type-legalization to avoid scalarization, e.g.:
7518 // v2f64 = uint_to_fp v2i16
7519 // =>
7520 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
7521 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
7522 OutScalarBits <= 64) {
7523 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
7524 EVT ExtVT = EVT::getVectorVT(
7525 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
7526 unsigned ExtOpcode =
7528 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
7529 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
7530 }
7531 return SDValue();
7532}
7533
7534SDValue SystemZTargetLowering::combineBSWAP(
7535 SDNode *N, DAGCombinerInfo &DCI) const {
7536 SelectionDAG &DAG = DCI.DAG;
7537 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
7538 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7539 N->getOperand(0).hasOneUse() &&
7540 canLoadStoreByteSwapped(N->getValueType(0))) {
7541 SDValue Load = N->getOperand(0);
7542 LoadSDNode *LD = cast<LoadSDNode>(Load);
7543
7544 // Create the byte-swapping load.
7545 SDValue Ops[] = {
7546 LD->getChain(), // Chain
7547 LD->getBasePtr() // Ptr
7548 };
7549 EVT LoadVT = N->getValueType(0);
7550 if (LoadVT == MVT::i16)
7551 LoadVT = MVT::i32;
7552 SDValue BSLoad =
7554 DAG.getVTList(LoadVT, MVT::Other),
7555 Ops, LD->getMemoryVT(), LD->getMemOperand());
7556
7557 // If this is an i16 load, insert the truncate.
7558 SDValue ResVal = BSLoad;
7559 if (N->getValueType(0) == MVT::i16)
7560 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
7561
7562 // First, combine the bswap away. This makes the value produced by the
7563 // load dead.
7564 DCI.CombineTo(N, ResVal);
7565
7566 // Next, combine the load away, we give it a bogus result value but a real
7567 // chain result. The result value is dead because the bswap is dead.
7568 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
7569
7570 // Return N so it doesn't get rechecked!
7571 return SDValue(N, 0);
7572 }
7573
7574 // Look through bitcasts that retain the number of vector elements.
7575 SDValue Op = N->getOperand(0);
7576 if (Op.getOpcode() == ISD::BITCAST &&
7577 Op.getValueType().isVector() &&
7578 Op.getOperand(0).getValueType().isVector() &&
7579 Op.getValueType().getVectorNumElements() ==
7580 Op.getOperand(0).getValueType().getVectorNumElements())
7581 Op = Op.getOperand(0);
7582
7583 // Push BSWAP into a vector insertion if at least one side then simplifies.
7584 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
7585 SDValue Vec = Op.getOperand(0);
7586 SDValue Elt = Op.getOperand(1);
7587 SDValue Idx = Op.getOperand(2);
7588
7590 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
7592 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
7593 (canLoadStoreByteSwapped(N->getValueType(0)) &&
7594 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
7595 EVT VecVT = N->getValueType(0);
7596 EVT EltVT = N->getValueType(0).getVectorElementType();
7597 if (VecVT != Vec.getValueType()) {
7598 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
7599 DCI.AddToWorklist(Vec.getNode());
7600 }
7601 if (EltVT != Elt.getValueType()) {
7602 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
7603 DCI.AddToWorklist(Elt.getNode());
7604 }
7605 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
7606 DCI.AddToWorklist(Vec.getNode());
7607 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
7608 DCI.AddToWorklist(Elt.getNode());
7609 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
7610 Vec, Elt, Idx);
7611 }
7612 }
7613
7614 // Push BSWAP into a vector shuffle if at least one side then simplifies.
7615 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
7616 if (SV && Op.hasOneUse()) {
7617 SDValue Op0 = Op.getOperand(0);
7618 SDValue Op1 = Op.getOperand(1);
7619
7621 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
7623 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
7624 EVT VecVT = N->getValueType(0);
7625 if (VecVT != Op0.getValueType()) {
7626 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
7627 DCI.AddToWorklist(Op0.getNode());
7628 }
7629 if (VecVT != Op1.getValueType()) {
7630 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
7631 DCI.AddToWorklist(Op1.getNode());
7632 }
7633 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
7634 DCI.AddToWorklist(Op0.getNode());
7635 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
7636 DCI.AddToWorklist(Op1.getNode());
7637 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
7638 }
7639 }
7640
7641 return SDValue();
7642}
7643
7644static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
7645 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
7646 // set by the CCReg instruction using the CCValid / CCMask masks,
7647 // If the CCReg instruction is itself a ICMP testing the condition
7648 // code set by some other instruction, see whether we can directly
7649 // use that condition code.
7650
7651 // Verify that we have an ICMP against some constant.
7652 if (CCValid != SystemZ::CCMASK_ICMP)
7653 return false;
7654 auto *ICmp = CCReg.getNode();
7655 if (ICmp->getOpcode() != SystemZISD::ICMP)
7656 return false;
7657 auto *CompareLHS = ICmp->getOperand(0).getNode();
7658 auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
7659 if (!CompareRHS)
7660 return false;
7661
7662 // Optimize the case where CompareLHS is a SELECT_CCMASK.
7663 if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
7664 // Verify that we have an appropriate mask for a EQ or NE comparison.
7665 bool Invert = false;
7666 if (CCMask == SystemZ::CCMASK_CMP_NE)
7667 Invert = !Invert;
7668 else if (CCMask != SystemZ::CCMASK_CMP_EQ)
7669 return false;
7670
7671 // Verify that the ICMP compares against one of select values.
7672 auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
7673 if (!TrueVal)
7674 return false;
7675 auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7676 if (!FalseVal)
7677 return false;
7678 if (CompareRHS->getZExtValue() == FalseVal->getZExtValue())
7679 Invert = !Invert;
7680 else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue())
7681 return false;
7682
7683 // Compute the effective CC mask for the new branch or select.
7684 auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
7685 auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
7686 if (!NewCCValid || !NewCCMask)
7687 return false;
7688 CCValid = NewCCValid->getZExtValue();
7689 CCMask = NewCCMask->getZExtValue();
7690 if (Invert)
7691 CCMask ^= CCValid;
7692
7693 // Return the updated CCReg link.
7694 CCReg = CompareLHS->getOperand(4);
7695 return true;
7696 }
7697
7698 // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
7699 if (CompareLHS->getOpcode() == ISD::SRA) {
7700 auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7701 if (!SRACount || SRACount->getZExtValue() != 30)
7702 return false;
7703 auto *SHL = CompareLHS->getOperand(0).getNode();
7704 if (SHL->getOpcode() != ISD::SHL)
7705 return false;
7706 auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
7707 if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
7708 return false;
7709 auto *IPM = SHL->getOperand(0).getNode();
7710 if (IPM->getOpcode() != SystemZISD::IPM)
7711 return false;
7712
7713 // Avoid introducing CC spills (because SRA would clobber CC).
7714 if (!CompareLHS->hasOneUse())
7715 return false;
7716 // Verify that the ICMP compares against zero.
7717 if (CompareRHS->getZExtValue() != 0)
7718 return false;
7719
7720 // Compute the effective CC mask for the new branch or select.
7721 CCMask = SystemZ::reverseCCMask(CCMask);
7722
7723 // Return the updated CCReg link.
7724 CCReg = IPM->getOperand(0);
7725 return true;
7726 }
7727
7728 return false;
7729}
7730
7731SDValue SystemZTargetLowering::combineBR_CCMASK(
7732 SDNode *N, DAGCombinerInfo &DCI) const {
7733 SelectionDAG &DAG = DCI.DAG;
7734
7735 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
7736 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
7737 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
7738 if (!CCValid || !CCMask)
7739 return SDValue();
7740
7741 int CCValidVal = CCValid->getZExtValue();
7742 int CCMaskVal = CCMask->getZExtValue();
7743 SDValue Chain = N->getOperand(0);
7744 SDValue CCReg = N->getOperand(4);
7745
7746 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7747 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
7748 Chain,
7749 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7750 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7751 N->getOperand(3), CCReg);
7752 return SDValue();
7753}
7754
7755SDValue SystemZTargetLowering::combineSELECT_CCMASK(
7756 SDNode *N, DAGCombinerInfo &DCI) const {
7757 SelectionDAG &DAG = DCI.DAG;
7758
7759 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
7760 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
7761 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
7762 if (!CCValid || !CCMask)
7763 return SDValue();
7764
7765 int CCValidVal = CCValid->getZExtValue();
7766 int CCMaskVal = CCMask->getZExtValue();
7767 SDValue CCReg = N->getOperand(4);
7768
7769 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7770 return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
7771 N->getOperand(0), N->getOperand(1),
7772 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7773 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7774 CCReg);
7775 return SDValue();
7776}
7777
7778
7779SDValue SystemZTargetLowering::combineGET_CCMASK(
7780 SDNode *N, DAGCombinerInfo &DCI) const {
7781
7782 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
7783 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
7784 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
7785 if (!CCValid || !CCMask)
7786 return SDValue();
7787 int CCValidVal = CCValid->getZExtValue();
7788 int CCMaskVal = CCMask->getZExtValue();
7789
7790 SDValue Select = N->getOperand(0);
7791 if (Select->getOpcode() == ISD::TRUNCATE)
7792 Select = Select->getOperand(0);
7793 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
7794 return SDValue();
7795
7796 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
7797 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
7798 if (!SelectCCValid || !SelectCCMask)
7799 return SDValue();
7800 int SelectCCValidVal = SelectCCValid->getZExtValue();
7801 int SelectCCMaskVal = SelectCCMask->getZExtValue();
7802
7803 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
7804 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
7805 if (!TrueVal || !FalseVal)
7806 return SDValue();
7807 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
7808 ;
7809 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
7810 SelectCCMaskVal ^= SelectCCValidVal;
7811 else
7812 return SDValue();
7813
7814 if (SelectCCValidVal & ~CCValidVal)
7815 return SDValue();
7816 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
7817 return SDValue();
7818
7819 return Select->getOperand(4);
7820}
7821
7822SDValue SystemZTargetLowering::combineIntDIVREM(
7823 SDNode *N, DAGCombinerInfo &DCI) const {
7824 SelectionDAG &DAG = DCI.DAG;
7825 EVT VT = N->getValueType(0);
7826 // In the case where the divisor is a vector of constants a cheaper
7827 // sequence of instructions can replace the divide. BuildSDIV is called to
7828 // do this during DAG combining, but it only succeeds when it can build a
7829 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
7830 // since it is not Legal but Custom it can only happen before
7831 // legalization. Therefore we must scalarize this early before Combine
7832 // 1. For widened vectors, this is already the result of type legalization.
7833 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
7834 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
7835 return DAG.UnrollVectorOp(N);
7836 return SDValue();
7837}
7838
7839SDValue SystemZTargetLowering::combineINTRINSIC(
7840 SDNode *N, DAGCombinerInfo &DCI) const {
7841 SelectionDAG &DAG = DCI.DAG;
7842
7843 unsigned Id = N->getConstantOperandVal(1);
7844 switch (Id) {
7845 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
7846 // or larger is simply a vector load.
7847 case Intrinsic::s390_vll:
7848 case Intrinsic::s390_vlrl:
7849 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
7850 if (C->getZExtValue() >= 15)
7851 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
7852 N->getOperand(3), MachinePointerInfo());
7853 break;
7854 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
7855 case Intrinsic::s390_vstl:
7856 case Intrinsic::s390_vstrl:
7857 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
7858 if (C->getZExtValue() >= 15)
7859 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
7860 N->getOperand(4), MachinePointerInfo());
7861 break;
7862 }
7863
7864 return SDValue();
7865}
7866
7867SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
7868 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
7869 return N->getOperand(0);
7870 return N;
7871}
7872
7874 DAGCombinerInfo &DCI) const {
7875 switch(N->getOpcode()) {
7876 default: break;
7877 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
7878 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
7879 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
7881 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
7882 case ISD::LOAD: return combineLOAD(N, DCI);
7883 case ISD::STORE: return combineSTORE(N, DCI);
7884 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
7885 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
7886 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
7888 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
7890 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
7891 case ISD::SINT_TO_FP:
7892 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
7893 case ISD::BSWAP: return combineBSWAP(N, DCI);
7894 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
7895 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
7896 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
7897 case ISD::SDIV:
7898 case ISD::UDIV:
7899 case ISD::SREM:
7900 case ISD::UREM: return combineIntDIVREM(N, DCI);
7902 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
7903 }
7904
7905 return SDValue();
7906}
7907
7908// Return the demanded elements for the OpNo source operand of Op. DemandedElts
7909// are for Op.
7910static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
7911 unsigned OpNo) {
7912 EVT VT = Op.getValueType();
7913 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
7914 APInt SrcDemE;
7915 unsigned Opcode = Op.getOpcode();
7916 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7917 unsigned Id = Op.getConstantOperandVal(0);
7918 switch (Id) {
7919 case Intrinsic::s390_vpksh: // PACKS
7920 case Intrinsic::s390_vpksf:
7921 case Intrinsic::s390_vpksg:
7922 case Intrinsic::s390_vpkshs: // PACKS_CC
7923 case Intrinsic::s390_vpksfs:
7924 case Intrinsic::s390_vpksgs:
7925 case Intrinsic::s390_vpklsh: // PACKLS
7926 case Intrinsic::s390_vpklsf:
7927 case Intrinsic::s390_vpklsg:
7928 case Intrinsic::s390_vpklshs: // PACKLS_CC
7929 case Intrinsic::s390_vpklsfs:
7930 case Intrinsic::s390_vpklsgs:
7931 // VECTOR PACK truncates the elements of two source vectors into one.
7932 SrcDemE = DemandedElts;
7933 if (OpNo == 2)
7934 SrcDemE.lshrInPlace(NumElts / 2);
7935 SrcDemE = SrcDemE.trunc(NumElts / 2);
7936 break;
7937 // VECTOR UNPACK extends half the elements of the source vector.
7938 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7939 case Intrinsic::s390_vuphh:
7940 case Intrinsic::s390_vuphf:
7941 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
7942 case Intrinsic::s390_vuplhh:
7943 case Intrinsic::s390_vuplhf:
7944 SrcDemE = APInt(NumElts * 2, 0);
7945 SrcDemE.insertBits(DemandedElts, 0);
7946 break;
7947 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7948 case Intrinsic::s390_vuplhw:
7949 case Intrinsic::s390_vuplf:
7950 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
7951 case Intrinsic::s390_vupllh:
7952 case Intrinsic::s390_vupllf:
7953 SrcDemE = APInt(NumElts * 2, 0);
7954 SrcDemE.insertBits(DemandedElts, NumElts);
7955 break;
7956 case Intrinsic::s390_vpdi: {
7957 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
7958 SrcDemE = APInt(NumElts, 0);
7959 if (!DemandedElts[OpNo - 1])
7960 break;
7961 unsigned Mask = Op.getConstantOperandVal(3);
7962 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
7963 // Demand input element 0 or 1, given by the mask bit value.
7964 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
7965 break;
7966 }
7967 case Intrinsic::s390_vsldb: {
7968 // VECTOR SHIFT LEFT DOUBLE BY BYTE
7969 assert(VT == MVT::v16i8 && "Unexpected type.");
7970 unsigned FirstIdx = Op.getConstantOperandVal(3);
7971 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
7972 unsigned NumSrc0Els = 16 - FirstIdx;
7973 SrcDemE = APInt(NumElts, 0);
7974 if (OpNo == 1) {
7975 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
7976 SrcDemE.insertBits(DemEls, FirstIdx);
7977 } else {
7978 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
7979 SrcDemE.insertBits(DemEls, 0);
7980 }
7981 break;
7982 }
7983 case Intrinsic::s390_vperm:
7984 SrcDemE = APInt(NumElts, -1);
7985 break;
7986 default:
7987 llvm_unreachable("Unhandled intrinsic.");
7988 break;
7989 }
7990 } else {
7991 switch (Opcode) {
7993 // Scalar operand.
7994 SrcDemE = APInt(1, 1);
7995 break;
7997 SrcDemE = DemandedElts;
7998 break;
7999 default:
8000 llvm_unreachable("Unhandled opcode.");
8001 break;
8002 }
8003 }
8004 return SrcDemE;
8005}
8006
8007static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
8008 const APInt &DemandedElts,
8009 const SelectionDAG &DAG, unsigned Depth,
8010 unsigned OpNo) {
8011 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
8012 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
8013 KnownBits LHSKnown =
8014 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
8015 KnownBits RHSKnown =
8016 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
8017 Known = LHSKnown.intersectWith(RHSKnown);
8018}
8019
8020void
8022 KnownBits &Known,
8023 const APInt &DemandedElts,
8024 const SelectionDAG &DAG,
8025 unsigned Depth) const {
8026 Known.resetAll();
8027
8028 // Intrinsic CC result is returned in the two low bits.
8029 unsigned tmp0, tmp1; // not used
8030 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) {
8031 Known.Zero.setBitsFrom(2);
8032 return;
8033 }
8034 EVT VT = Op.getValueType();
8035 if (Op.getResNo() != 0 || VT == MVT::Untyped)
8036 return;
8037 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
8038 "KnownBits does not match VT in bitwidth");
8039 assert ((!VT.isVector() ||
8040 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
8041 "DemandedElts does not match VT number of elements");
8042 unsigned BitWidth = Known.getBitWidth();
8043 unsigned Opcode = Op.getOpcode();
8044 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
8045 bool IsLogical = false;
8046 unsigned Id = Op.getConstantOperandVal(0);
8047 switch (Id) {
8048 case Intrinsic::s390_vpksh: // PACKS
8049 case Intrinsic::s390_vpksf:
8050 case Intrinsic::s390_vpksg:
8051 case Intrinsic::s390_vpkshs: // PACKS_CC
8052 case Intrinsic::s390_vpksfs:
8053 case Intrinsic::s390_vpksgs:
8054 case Intrinsic::s390_vpklsh: // PACKLS
8055 case Intrinsic::s390_vpklsf:
8056 case Intrinsic::s390_vpklsg:
8057 case Intrinsic::s390_vpklshs: // PACKLS_CC
8058 case Intrinsic::s390_vpklsfs:
8059 case Intrinsic::s390_vpklsgs:
8060 case Intrinsic::s390_vpdi:
8061 case Intrinsic::s390_vsldb:
8062 case Intrinsic::s390_vperm:
8063 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
8064 break;
8065 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
8066 case Intrinsic::s390_vuplhh:
8067 case Intrinsic::s390_vuplhf:
8068 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
8069 case Intrinsic::s390_vupllh:
8070 case Intrinsic::s390_vupllf:
8071 IsLogical = true;
8072 [[fallthrough]];
8073 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
8074 case Intrinsic::s390_vuphh:
8075 case Intrinsic::s390_vuphf:
8076 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
8077 case Intrinsic::s390_vuplhw:
8078 case Intrinsic::s390_vuplf: {
8079 SDValue SrcOp = Op.getOperand(1);
8080 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
8081 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
8082 if (IsLogical) {
8083 Known = Known.zext(BitWidth);
8084 } else
8085 Known = Known.sext(BitWidth);
8086 break;
8087 }
8088 default:
8089 break;
8090 }
8091 } else {
8092 switch (Opcode) {
8095 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
8096 break;
8097 case SystemZISD::REPLICATE: {
8098 SDValue SrcOp = Op.getOperand(0);
8099 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
8100 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
8101 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
8102 break;
8103 }
8104 default:
8105 break;
8106 }
8107 }
8108
8109 // Known has the width of the source operand(s). Adjust if needed to match
8110 // the passed bitwidth.
8111 if (Known.getBitWidth() != BitWidth)
8112 Known = Known.anyextOrTrunc(BitWidth);
8113}
8114
8115static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
8116 const SelectionDAG &DAG, unsigned Depth,
8117 unsigned OpNo) {
8118 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
8119 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
8120 if (LHS == 1) return 1; // Early out.
8121 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
8122 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
8123 if (RHS == 1) return 1; // Early out.
8124 unsigned Common = std::min(LHS, RHS);
8125 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
8126 EVT VT = Op.getValueType();
8127 unsigned VTBits = VT.getScalarSizeInBits();
8128 if (SrcBitWidth > VTBits) { // PACK
8129 unsigned SrcExtraBits = SrcBitWidth - VTBits;
8130 if (Common > SrcExtraBits)
8131 return (Common - SrcExtraBits);
8132 return 1;
8133 }
8134 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
8135 return Common;
8136}
8137
8138unsigned
8140 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
8141 unsigned Depth) const {
8142 if (Op.getResNo() != 0)
8143 return 1;
8144 unsigned Opcode = Op.getOpcode();
8145 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
8146 unsigned Id = Op.getConstantOperandVal(0);
8147 switch (Id) {
8148 case Intrinsic::s390_vpksh: // PACKS
8149 case Intrinsic::s390_vpksf:
8150 case Intrinsic::s390_vpksg:
8151 case Intrinsic::s390_vpkshs: // PACKS_CC
8152 case Intrinsic::s390_vpksfs:
8153 case Intrinsic::s390_vpksgs:
8154 case Intrinsic::s390_vpklsh: // PACKLS
8155 case Intrinsic::s390_vpklsf:
8156 case Intrinsic::s390_vpklsg:
8157 case Intrinsic::s390_vpklshs: // PACKLS_CC
8158 case Intrinsic::s390_vpklsfs:
8159 case Intrinsic::s390_vpklsgs:
8160 case Intrinsic::s390_vpdi:
8161 case Intrinsic::s390_vsldb:
8162 case Intrinsic::s390_vperm:
8163 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
8164 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
8165 case Intrinsic::s390_vuphh:
8166 case Intrinsic::s390_vuphf:
8167 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
8168 case Intrinsic::s390_vuplhw:
8169 case Intrinsic::s390_vuplf: {
8170 SDValue PackedOp = Op.getOperand(1);
8171 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
8172 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
8173 EVT VT = Op.getValueType();
8174 unsigned VTBits = VT.getScalarSizeInBits();
8175 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
8176 return Tmp;
8177 }
8178 default:
8179 break;
8180 }
8181 } else {
8182 switch (Opcode) {
8184 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
8185 default:
8186 break;
8187 }
8188 }
8189
8190 return 1;
8191}
8192
8195 const APInt &DemandedElts, const SelectionDAG &DAG,
8196 bool PoisonOnly, unsigned Depth) const {
8197 switch (Op->getOpcode()) {
8200 return true;
8201 }
8202 return false;
8203}
8204
8205unsigned
8207 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
8208 unsigned StackAlign = TFI->getStackAlignment();
8209 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
8210 "Unexpected stack alignment");
8211 // The default stack probe size is 4096 if the function has no
8212 // stack-probe-size attribute.
8213 unsigned StackProbeSize =
8214 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
8215 // Round down to the stack alignment.
8216 StackProbeSize &= ~(StackAlign - 1);
8217 return StackProbeSize ? StackProbeSize : StackAlign;
8218}
8219
8220//===----------------------------------------------------------------------===//
8221// Custom insertion
8222//===----------------------------------------------------------------------===//
8223
8224// Force base value Base into a register before MI. Return the register.
8226 const SystemZInstrInfo *TII) {
8227 MachineBasicBlock *MBB = MI.getParent();
8228 MachineFunction &MF = *MBB->getParent();
8230
8231 if (Base.isReg()) {
8232 // Copy Base into a new virtual register to help register coalescing in
8233 // cases with multiple uses.
8234 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8235 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
8236 .add(Base);
8237 return Reg;
8238 }
8239
8240 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8241 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
8242 .add(Base)
8243 .addImm(0)
8244 .addReg(0);
8245 return Reg;
8246}
8247
8248// The CC operand of MI might be missing a kill marker because there
8249// were multiple uses of CC, and ISel didn't know which to mark.
8250// Figure out whether MI should have had a kill marker.
8252 // Scan forward through BB for a use/def of CC.
8254 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
8255 const MachineInstr& mi = *miI;
8256 if (mi.readsRegister(SystemZ::CC, /*TRI=*/nullptr))
8257 return false;
8258 if (mi.definesRegister(SystemZ::CC, /*TRI=*/nullptr))
8259 break; // Should have kill-flag - update below.
8260 }
8261
8262 // If we hit the end of the block, check whether CC is live into a
8263 // successor.
8264 if (miI == MBB->end()) {
8265 for (const MachineBasicBlock *Succ : MBB->successors())
8266 if (Succ->isLiveIn(SystemZ::CC))
8267 return false;
8268 }
8269
8270 return true;
8271}
8272
8273// Return true if it is OK for this Select pseudo-opcode to be cascaded
8274// together with other Select pseudo-opcodes into a single basic-block with
8275// a conditional jump around it.
8277 switch (MI.getOpcode()) {
8278 case SystemZ::Select32:
8279 case SystemZ::Select64:
8280 case SystemZ::Select128:
8281 case SystemZ::SelectF32:
8282 case SystemZ::SelectF64:
8283 case SystemZ::SelectF128:
8284 case SystemZ::SelectVR32:
8285 case SystemZ::SelectVR64:
8286 case SystemZ::SelectVR128:
8287 return true;
8288
8289 default:
8290 return false;
8291 }
8292}
8293
8294// Helper function, which inserts PHI functions into SinkMBB:
8295// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
8296// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
8298 MachineBasicBlock *TrueMBB,
8299 MachineBasicBlock *FalseMBB,
8300 MachineBasicBlock *SinkMBB) {
8301 MachineFunction *MF = TrueMBB->getParent();
8303
8304 MachineInstr *FirstMI = Selects.front();
8305 unsigned CCValid = FirstMI->getOperand(3).getImm();
8306 unsigned CCMask = FirstMI->getOperand(4).getImm();
8307
8308 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
8309
8310 // As we are creating the PHIs, we have to be careful if there is more than
8311 // one. Later Selects may reference the results of earlier Selects, but later
8312 // PHIs have to reference the individual true/false inputs from earlier PHIs.
8313 // That also means that PHI construction must work forward from earlier to
8314 // later, and that the code must maintain a mapping from earlier PHI's
8315 // destination registers, and the registers that went into the PHI.
8317
8318 for (auto *MI : Selects) {
8319 Register DestReg = MI->getOperand(0).getReg();
8320 Register TrueReg = MI->getOperand(1).getReg();
8321 Register FalseReg = MI->getOperand(2).getReg();
8322
8323 // If this Select we are generating is the opposite condition from
8324 // the jump we generated, then we have to swap the operands for the
8325 // PHI that is going to be generated.
8326 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
8327 std::swap(TrueReg, FalseReg);
8328
8329 if (RegRewriteTable.contains(TrueReg))
8330 TrueReg = RegRewriteTable[TrueReg].first;
8331
8332 if (RegRewriteTable.contains(FalseReg))
8333 FalseReg = RegRewriteTable[FalseReg].second;
8334
8335 DebugLoc DL = MI->getDebugLoc();
8336 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
8337 .addReg(TrueReg).addMBB(TrueMBB)
8338 .addReg(FalseReg).addMBB(FalseMBB);
8339
8340 // Add this PHI to the rewrite table.
8341 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
8342 }
8343
8345}
8346
8348SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
8349 MachineBasicBlock *BB) const {
8350 MachineFunction &MF = *BB->getParent();
8351 MachineFrameInfo &MFI = MF.getFrameInfo();
8352 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
8353 assert(TFL->hasReservedCallFrame(MF) &&
8354 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
8355 (void)TFL;
8356 // Get the MaxCallFrameSize value and erase MI since it serves no further
8357 // purpose as the call frame is statically reserved in the prolog. Set
8358 // AdjustsStack as MI is *not* mapped as a frame instruction.
8359 uint32_t NumBytes = MI.getOperand(0).getImm();
8360 if (NumBytes > MFI.getMaxCallFrameSize())
8361 MFI.setMaxCallFrameSize(NumBytes);
8362 MFI.setAdjustsStack(true);
8363
8364 MI.eraseFromParent();
8365 return BB;
8366}
8367
8368// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
8370SystemZTargetLowering::emitSelect(MachineInstr &MI,
8371 MachineBasicBlock *MBB) const {
8372 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
8373 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8374
8375 unsigned CCValid = MI.getOperand(3).getImm();
8376 unsigned CCMask = MI.getOperand(4).getImm();
8377
8378 // If we have a sequence of Select* pseudo instructions using the
8379 // same condition code value, we want to expand all of them into
8380 // a single pair of basic blocks using the same condition.
8383 Selects.push_back(&MI);
8384 unsigned Count = 0;
8385 for (MachineInstr &NextMI : llvm::make_range(
8386 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
8387 if (isSelectPseudo(NextMI)) {
8388 assert(NextMI.getOperand(3).getImm() == CCValid &&
8389 "Bad CCValid operands since CC was not redefined.");
8390 if (NextMI.getOperand(4).getImm() == CCMask ||
8391 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
8392 Selects.push_back(&NextMI);
8393 continue;
8394 }
8395 break;
8396 }
8397 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) ||
8398 NextMI.usesCustomInsertionHook())
8399 break;
8400 bool User = false;
8401 for (auto *SelMI : Selects)
8402 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
8403 User = true;
8404 break;
8405 }
8406 if (NextMI.isDebugInstr()) {
8407 if (User) {
8408 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
8409 DbgValues.push_back(&NextMI);
8410 }
8411 } else if (User || ++Count > 20)
8412 break;
8413 }
8414
8415 MachineInstr *LastMI = Selects.back();
8416 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) ||
8417 checkCCKill(*LastMI, MBB));
8418 MachineBasicBlock *StartMBB = MBB;
8420 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8421
8422 // Unless CC was killed in the last Select instruction, mark it as
8423 // live-in to both FalseMBB and JoinMBB.
8424 if (!CCKilled) {
8425 FalseMBB->addLiveIn(SystemZ::CC);
8426 JoinMBB->addLiveIn(SystemZ::CC);
8427 }
8428
8429 // StartMBB:
8430 // BRC CCMask, JoinMBB
8431 // # fallthrough to FalseMBB
8432 MBB = StartMBB;
8433 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8434 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8435 MBB->addSuccessor(JoinMBB);
8436 MBB->addSuccessor(FalseMBB);
8437
8438 // FalseMBB:
8439 // # fallthrough to JoinMBB
8440 MBB = FalseMBB;
8441 MBB->addSuccessor(JoinMBB);
8442
8443 // JoinMBB:
8444 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
8445 // ...
8446 MBB = JoinMBB;
8447 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
8448 for (auto *SelMI : Selects)
8449 SelMI->eraseFromParent();
8450
8452 for (auto *DbgMI : DbgValues)
8453 MBB->splice(InsertPos, StartMBB, DbgMI);
8454
8455 return JoinMBB;
8456}
8457
8458// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
8459// StoreOpcode is the store to use and Invert says whether the store should
8460// happen when the condition is false rather than true. If a STORE ON
8461// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
8462MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
8464 unsigned StoreOpcode,
8465 unsigned STOCOpcode,
8466 bool Invert) const {
8467 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8468
8469 Register SrcReg = MI.getOperand(0).getReg();
8470 MachineOperand Base = MI.getOperand(1);
8471 int64_t Disp = MI.getOperand(2).getImm();
8472 Register IndexReg = MI.getOperand(3).getReg();
8473 unsigned CCValid = MI.getOperand(4).getImm();
8474 unsigned CCMask = MI.getOperand(5).getImm();
8475 DebugLoc DL = MI.getDebugLoc();
8476
8477 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
8478
8479 // ISel pattern matching also adds a load memory operand of the same
8480 // address, so take special care to find the storing memory operand.
8481 MachineMemOperand *MMO = nullptr;
8482 for (auto *I : MI.memoperands())
8483 if (I->isStore()) {
8484 MMO = I;
8485 break;
8486 }
8487
8488 // Use STOCOpcode if possible. We could use different store patterns in
8489 // order to avoid matching the index register, but the performance trade-offs
8490 // might be more complicated in that case.
8491 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
8492 if (Invert)
8493 CCMask ^= CCValid;
8494
8495 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
8496 .addReg(SrcReg)
8497 .add(Base)
8498 .addImm(Disp)
8499 .addImm(CCValid)
8500 .addImm(CCMask)
8501 .addMemOperand(MMO);
8502
8503 MI.eraseFromParent();
8504 return MBB;
8505 }
8506
8507 // Get the condition needed to branch around the store.
8508 if (!Invert)
8509 CCMask ^= CCValid;
8510
8511 MachineBasicBlock *StartMBB = MBB;
8513 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8514
8515 // Unless CC was killed in the CondStore instruction, mark it as
8516 // live-in to both FalseMBB and JoinMBB.
8517 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) &&
8518 !checkCCKill(MI, JoinMBB)) {
8519 FalseMBB->addLiveIn(SystemZ::CC);
8520 JoinMBB->addLiveIn(SystemZ::CC);
8521 }
8522
8523 // StartMBB:
8524 // BRC CCMask, JoinMBB
8525 // # fallthrough to FalseMBB
8526 MBB = StartMBB;
8527 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8528 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8529 MBB->addSuccessor(JoinMBB);
8530 MBB->addSuccessor(FalseMBB);
8531
8532 // FalseMBB:
8533 // store %SrcReg, %Disp(%Index,%Base)
8534 // # fallthrough to JoinMBB
8535 MBB = FalseMBB;
8536 BuildMI(MBB, DL, TII->get(StoreOpcode))
8537 .addReg(SrcReg)
8538 .add(Base)
8539 .addImm(Disp)
8540 .addReg(IndexReg)
8541 .addMemOperand(MMO);
8542 MBB->addSuccessor(JoinMBB);
8543
8544 MI.eraseFromParent();
8545 return JoinMBB;
8546}
8547
8548// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
8550SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
8552 bool Unsigned) const {
8553 MachineFunction &MF = *MBB->getParent();
8554 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8556
8557 // Synthetic instruction to compare 128-bit values.
8558 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
8559 Register Op0 = MI.getOperand(0).getReg();
8560 Register Op1 = MI.getOperand(1).getReg();
8561
8562 MachineBasicBlock *StartMBB = MBB;
8564 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
8565
8566 // StartMBB:
8567 //
8568 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
8569 // Swap the inputs to get:
8570 // CC 1 if high(Op0) > high(Op1)
8571 // CC 2 if high(Op0) < high(Op1)
8572 // CC 0 if high(Op0) == high(Op1)
8573 //
8574 // If CC != 0, we'd done, so jump over the next instruction.
8575 //
8576 // VEC[L]G Op1, Op0
8577 // JNE JoinMBB
8578 // # fallthrough to HiEqMBB
8579 MBB = StartMBB;
8580 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
8581 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
8582 .addReg(Op1).addReg(Op0);
8583 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8585 MBB->addSuccessor(JoinMBB);
8586 MBB->addSuccessor(HiEqMBB);
8587
8588 // HiEqMBB:
8589 //
8590 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
8591 // Since we already know the high parts are equal, the CC
8592 // result will only depend on the low parts:
8593 // CC 1 if low(Op0) > low(Op1)
8594 // CC 3 if low(Op0) <= low(Op1)
8595 //
8596 // VCHLGS Tmp, Op0, Op1
8597 // # fallthrough to JoinMBB
8598 MBB = HiEqMBB;
8599 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
8600 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
8601 .addReg(Op0).addReg(Op1);
8602 MBB->addSuccessor(JoinMBB);
8603
8604 // Mark CC as live-in to JoinMBB.
8605 JoinMBB->addLiveIn(SystemZ::CC);
8606
8607 MI.eraseFromParent();
8608 return JoinMBB;
8609}
8610
8611// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
8612// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
8613// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
8614// whether the field should be inverted after performing BinOpcode (e.g. for
8615// NAND).
8616MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
8617 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
8618 bool Invert) const {
8619 MachineFunction &MF = *MBB->getParent();
8620 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8622
8623 // Extract the operands. Base can be a register or a frame index.
8624 // Src2 can be a register or immediate.
8625 Register Dest = MI.getOperand(0).getReg();
8626 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8627 int64_t Disp = MI.getOperand(2).getImm();
8628 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
8629 Register BitShift = MI.getOperand(4).getReg();
8630 Register NegBitShift = MI.getOperand(5).getReg();
8631 unsigned BitSize = MI.getOperand(6).getImm();
8632 DebugLoc DL = MI.getDebugLoc();
8633
8634 // Get the right opcodes for the displacement.
8635 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8636 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8637 assert(LOpcode && CSOpcode && "Displacement out of range");
8638
8639 // Create virtual registers for temporary results.
8640 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8641 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8642 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8643 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8644 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8645
8646 // Insert a basic block for the main loop.
8647 MachineBasicBlock *StartMBB = MBB;
8649 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8650
8651 // StartMBB:
8652 // ...
8653 // %OrigVal = L Disp(%Base)
8654 // # fall through to LoopMBB
8655 MBB = StartMBB;
8656 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
8657 MBB->addSuccessor(LoopMBB);
8658
8659 // LoopMBB:
8660 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
8661 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
8662 // %RotatedNewVal = OP %RotatedOldVal, %Src2
8663 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
8664 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
8665 // JNE LoopMBB
8666 // # fall through to DoneMBB
8667 MBB = LoopMBB;
8668 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8669 .addReg(OrigVal).addMBB(StartMBB)
8670 .addReg(Dest).addMBB(LoopMBB);
8671 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
8672 .addReg(OldVal).addReg(BitShift).addImm(0);
8673 if (Invert) {
8674 // Perform the operation normally and then invert every bit of the field.
8675 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8676 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
8677 // XILF with the upper BitSize bits set.
8678 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
8679 .addReg(Tmp).addImm(-1U << (32 - BitSize));
8680 } else if (BinOpcode)
8681 // A simply binary operation.
8682 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
8683 .addReg(RotatedOldVal)
8684 .add(Src2);
8685 else
8686 // Use RISBG to rotate Src2 into position and use it to replace the
8687 // field in RotatedOldVal.
8688 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
8689 .addReg(RotatedOldVal).addReg(Src2.getReg())
8690 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
8691 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
8692 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
8693 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
8694 .addReg(OldVal)
8695 .addReg(NewVal)
8696 .add(Base)
8697 .addImm(Disp);
8698 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8700 MBB->addSuccessor(LoopMBB);
8701 MBB->addSuccessor(DoneMBB);
8702
8703 MI.eraseFromParent();
8704 return DoneMBB;
8705}
8706
8707// Implement EmitInstrWithCustomInserter for subword pseudo
8708// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
8709// instruction that should be used to compare the current field with the
8710// minimum or maximum value. KeepOldMask is the BRC condition-code mask
8711// for when the current field should be kept.
8712MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
8713 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
8714 unsigned KeepOldMask) const {
8715 MachineFunction &MF = *MBB->getParent();
8716 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8718
8719 // Extract the operands. Base can be a register or a frame index.
8720 Register Dest = MI.getOperand(0).getReg();
8721 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8722 int64_t Disp = MI.getOperand(2).getImm();
8723 Register Src2 = MI.getOperand(3).getReg();
8724 Register BitShift = MI.getOperand(4).getReg();
8725 Register NegBitShift = MI.getOperand(5).getReg();
8726 unsigned BitSize = MI.getOperand(6).getImm();
8727 DebugLoc DL = MI.getDebugLoc();
8728
8729 // Get the right opcodes for the displacement.
8730 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8731 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8732 assert(LOpcode && CSOpcode && "Displacement out of range");
8733
8734 // Create virtual registers for temporary results.
8735 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8736 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8737 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8738 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8739 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8740 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8741
8742 // Insert 3 basic blocks for the loop.
8743 MachineBasicBlock *StartMBB = MBB;
8745 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8746 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
8747 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
8748
8749 // StartMBB:
8750 // ...
8751 // %OrigVal = L Disp(%Base)
8752 // # fall through to LoopMBB
8753 MBB = StartMBB;
8754 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
8755 MBB->addSuccessor(LoopMBB);
8756
8757 // LoopMBB:
8758 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
8759 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
8760 // CompareOpcode %RotatedOldVal, %Src2
8761 // BRC KeepOldMask, UpdateMBB
8762 MBB = LoopMBB;
8763 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8764 .addReg(OrigVal).addMBB(StartMBB)
8765 .addReg(Dest).addMBB(UpdateMBB);
8766 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
8767 .addReg(OldVal).addReg(BitShift).addImm(0);
8768 BuildMI(MBB, DL, TII->get(CompareOpcode))
8769 .addReg(RotatedOldVal).addReg(Src2);
8770 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8771 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
8772 MBB->addSuccessor(UpdateMBB);
8773 MBB->addSuccessor(UseAltMBB);
8774
8775 // UseAltMBB:
8776 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
8777 // # fall through to UpdateMBB
8778 MBB = UseAltMBB;
8779 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
8780 .addReg(RotatedOldVal).addReg(Src2)
8781 .addImm(32).addImm(31 + BitSize).addImm(0);
8782 MBB->addSuccessor(UpdateMBB);
8783
8784 // UpdateMBB:
8785 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
8786 // [ %RotatedAltVal, UseAltMBB ]
8787 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
8788 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
8789 // JNE LoopMBB
8790 // # fall through to DoneMBB
8791 MBB = UpdateMBB;
8792 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
8793 .addReg(RotatedOldVal).addMBB(LoopMBB)
8794 .addReg(RotatedAltVal).addMBB(UseAltMBB);
8795 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
8796 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
8797 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
8798 .addReg(OldVal)
8799 .addReg(NewVal)
8800 .add(Base)
8801 .addImm(Disp);
8802 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8804 MBB->addSuccessor(LoopMBB);
8805 MBB->addSuccessor(DoneMBB);
8806
8807 MI.eraseFromParent();
8808 return DoneMBB;
8809}
8810
8811// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
8812// instruction MI.
8814SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
8815 MachineBasicBlock *MBB) const {
8816 MachineFunction &MF = *MBB->getParent();
8817 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8819
8820 // Extract the operands. Base can be a register or a frame index.
8821 Register Dest = MI.getOperand(0).getReg();
8822 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8823 int64_t Disp = MI.getOperand(2).getImm();
8824 Register CmpVal = MI.getOperand(3).getReg();
8825 Register OrigSwapVal = MI.getOperand(4).getReg();
8826 Register BitShift = MI.getOperand(5).getReg();
8827 Register NegBitShift = MI.getOperand(6).getReg();
8828 int64_t BitSize = MI.getOperand(7).getImm();
8829 DebugLoc DL = MI.getDebugLoc();
8830
8831 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
8832
8833 // Get the right opcodes for the displacement and zero-extension.
8834 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8835 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8836 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
8837 assert(LOpcode && CSOpcode && "Displacement out of range");
8838
8839 // Create virtual registers for temporary results.
8840 Register OrigOldVal = MRI.createVirtualRegister(RC);
8841 Register OldVal = MRI.createVirtualRegister(RC);
8842 Register SwapVal = MRI.createVirtualRegister(RC);
8843 Register StoreVal = MRI.createVirtualRegister(RC);
8844 Register OldValRot = MRI.createVirtualRegister(RC);
8845 Register RetryOldVal = MRI.createVirtualRegister(RC);
8846 Register RetrySwapVal = MRI.createVirtualRegister(RC);
8847
8848 // Insert 2 basic blocks for the loop.
8849 MachineBasicBlock *StartMBB = MBB;
8851 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8852 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
8853
8854 // StartMBB:
8855 // ...
8856 // %OrigOldVal = L Disp(%Base)
8857 // # fall through to LoopMBB
8858 MBB = StartMBB;
8859 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
8860 .add(Base)
8861 .addImm(Disp)
8862 .addReg(0);
8863 MBB->addSuccessor(LoopMBB);
8864
8865 // LoopMBB:
8866 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
8867 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
8868 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
8869 // ^^ The low BitSize bits contain the field
8870 // of interest.
8871 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
8872 // ^^ Replace the upper 32-BitSize bits of the
8873 // swap value with those that we loaded and rotated.
8874 // %Dest = LL[CH] %OldValRot
8875 // CR %Dest, %CmpVal
8876 // JNE DoneMBB
8877 // # Fall through to SetMBB
8878 MBB = LoopMBB;
8879 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8880 .addReg(OrigOldVal).addMBB(StartMBB)
8881 .addReg(RetryOldVal).addMBB(SetMBB);
8882 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
8883 .addReg(OrigSwapVal).addMBB(StartMBB)
8884 .addReg(RetrySwapVal).addMBB(SetMBB);
8885 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
8886 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
8887 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
8888 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
8889 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
8890 .addReg(OldValRot);
8891 BuildMI(MBB, DL, TII->get(SystemZ::CR))
8892 .addReg(Dest).addReg(CmpVal);
8893 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8896 MBB->addSuccessor(DoneMBB);
8897 MBB->addSuccessor(SetMBB);
8898
8899 // SetMBB:
8900 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
8901 // ^^ Rotate the new field to its proper position.
8902 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
8903 // JNE LoopMBB
8904 // # fall through to ExitMBB
8905 MBB = SetMBB;
8906 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
8907 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
8908 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
8909 .addReg(OldVal)
8910 .addReg(StoreVal)
8911 .add(Base)
8912 .addImm(Disp);
8913 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8915 MBB->addSuccessor(LoopMBB);
8916 MBB->addSuccessor(DoneMBB);
8917
8918 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
8919 // to the block after the loop. At this point, CC may have been defined
8920 // either by the CR in LoopMBB or by the CS in SetMBB.
8921 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))
8922 DoneMBB->addLiveIn(SystemZ::CC);
8923
8924 MI.eraseFromParent();
8925 return DoneMBB;
8926}
8927
8928// Emit a move from two GR64s to a GR128.
8930SystemZTargetLowering::emitPair128(MachineInstr &MI,
8931 MachineBasicBlock *MBB) const {
8932 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8933 const DebugLoc &DL = MI.getDebugLoc();
8934
8935 Register Dest = MI.getOperand(0).getReg();
8936 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest)
8937 .add(MI.getOperand(1))
8938 .addImm(SystemZ::subreg_h64)
8939 .add(MI.getOperand(2))
8940 .addImm(SystemZ::subreg_l64);
8941 MI.eraseFromParent();
8942 return MBB;
8943}
8944
8945// Emit an extension from a GR64 to a GR128. ClearEven is true
8946// if the high register of the GR128 value must be cleared or false if
8947// it's "don't care".
8948MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
8950 bool ClearEven) const {
8951 MachineFunction &MF = *MBB->getParent();
8952 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8954 DebugLoc DL = MI.getDebugLoc();
8955
8956 Register Dest = MI.getOperand(0).getReg();
8957 Register Src = MI.getOperand(1).getReg();
8958 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8959
8960 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
8961 if (ClearEven) {
8962 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8963 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
8964
8965 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
8966 .addImm(0);
8967 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
8968 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
8969 In128 = NewIn128;
8970 }
8971 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
8972 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
8973
8974 MI.eraseFromParent();
8975 return MBB;
8976}
8977
8979SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
8981 unsigned Opcode, bool IsMemset) const {
8982 MachineFunction &MF = *MBB->getParent();
8983 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8985 DebugLoc DL = MI.getDebugLoc();
8986
8987 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
8988 uint64_t DestDisp = MI.getOperand(1).getImm();
8989 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
8990 uint64_t SrcDisp;
8991
8992 // Fold the displacement Disp if it is out of range.
8993 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
8994 if (!isUInt<12>(Disp)) {
8995 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8996 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
8997 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
8998 .add(Base).addImm(Disp).addReg(0);
8999 Base = MachineOperand::CreateReg(Reg, false);
9000 Disp = 0;
9001 }
9002 };
9003
9004 if (!IsMemset) {
9005 SrcBase = earlyUseOperand(MI.getOperand(2));
9006 SrcDisp = MI.getOperand(3).getImm();
9007 } else {
9008 SrcBase = DestBase;
9009 SrcDisp = DestDisp++;
9010 foldDisplIfNeeded(DestBase, DestDisp);
9011 }
9012
9013 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
9014 bool IsImmForm = LengthMO.isImm();
9015 bool IsRegForm = !IsImmForm;
9016
9017 // Build and insert one Opcode of Length, with special treatment for memset.
9018 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
9020 MachineOperand DBase, uint64_t DDisp,
9022 unsigned Length) -> void {
9023 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
9024 if (IsMemset) {
9025 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
9026 if (ByteMO.isImm())
9027 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
9028 .add(SBase).addImm(SDisp).add(ByteMO);
9029 else
9030 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
9031 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
9032 if (--Length == 0)
9033 return;
9034 }
9035 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
9036 .add(DBase).addImm(DDisp).addImm(Length)
9037 .add(SBase).addImm(SDisp)
9038 .setMemRefs(MI.memoperands());
9039 };
9040
9041 bool NeedsLoop = false;
9042 uint64_t ImmLength = 0;
9043 Register LenAdjReg = SystemZ::NoRegister;
9044 if (IsImmForm) {
9045 ImmLength = LengthMO.getImm();
9046 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
9047 if (ImmLength == 0) {
9048 MI.eraseFromParent();
9049 return MBB;
9050 }
9051 if (Opcode == SystemZ::CLC) {
9052 if (ImmLength > 3 * 256)
9053 // A two-CLC sequence is a clear win over a loop, not least because
9054 // it needs only one branch. A three-CLC sequence needs the same
9055 // number of branches as a loop (i.e. 2), but is shorter. That
9056 // brings us to lengths greater than 768 bytes. It seems relatively
9057 // likely that a difference will be found within the first 768 bytes,
9058 // so we just optimize for the smallest number of branch
9059 // instructions, in order to avoid polluting the prediction buffer
9060 // too much.
9061 NeedsLoop = true;
9062 } else if (ImmLength > 6 * 256)
9063 // The heuristic we use is to prefer loops for anything that would
9064 // require 7 or more MVCs. With these kinds of sizes there isn't much
9065 // to choose between straight-line code and looping code, since the
9066 // time will be dominated by the MVCs themselves.
9067 NeedsLoop = true;
9068 } else {
9069 NeedsLoop = true;
9070 LenAdjReg = LengthMO.getReg();
9071 }
9072
9073 // When generating more than one CLC, all but the last will need to
9074 // branch to the end when a difference is found.
9075 MachineBasicBlock *EndMBB =
9076 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
9078 : nullptr);
9079
9080 if (NeedsLoop) {
9081 Register StartCountReg =
9082 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
9083 if (IsImmForm) {
9084 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
9085 ImmLength &= 255;
9086 } else {
9087 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
9088 .addReg(LenAdjReg)
9089 .addReg(0)
9090 .addImm(8);
9091 }
9092
9093 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
9094 auto loadZeroAddress = [&]() -> MachineOperand {
9095 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9096 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
9097 return MachineOperand::CreateReg(Reg, false);
9098 };
9099 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
9100 DestBase = loadZeroAddress();
9101 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
9102 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
9103
9104 MachineBasicBlock *StartMBB = nullptr;
9105 MachineBasicBlock *LoopMBB = nullptr;
9106 MachineBasicBlock *NextMBB = nullptr;
9107 MachineBasicBlock *DoneMBB = nullptr;
9108 MachineBasicBlock *AllDoneMBB = nullptr;
9109
9110 Register StartSrcReg = forceReg(MI, SrcBase, TII);
9111 Register StartDestReg =
9112 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
9113
9114 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
9115 Register ThisSrcReg = MRI.createVirtualRegister(RC);
9116 Register ThisDestReg =
9117 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
9118 Register NextSrcReg = MRI.createVirtualRegister(RC);
9119 Register NextDestReg =
9120 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
9121 RC = &SystemZ::GR64BitRegClass;
9122 Register ThisCountReg = MRI.createVirtualRegister(RC);
9123 Register NextCountReg = MRI.createVirtualRegister(RC);
9124
9125 if (IsRegForm) {
9126 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
9127 StartMBB = SystemZ::emitBlockAfter(MBB);
9128 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9129 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
9130 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
9131
9132 // MBB:
9133 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
9134 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9135 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
9136 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9138 .addMBB(AllDoneMBB);
9139 MBB->addSuccessor(AllDoneMBB);
9140 if (!IsMemset)
9141 MBB->addSuccessor(StartMBB);
9142 else {
9143 // MemsetOneCheckMBB:
9144 // # Jump to MemsetOneMBB for a memset of length 1, or
9145 // # fall thru to StartMBB.
9146 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
9147 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
9148 MBB->addSuccessor(MemsetOneCheckMBB);
9149 MBB = MemsetOneCheckMBB;
9150 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9151 .addReg(LenAdjReg).addImm(-1);
9152 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9154 .addMBB(MemsetOneMBB);
9155 MBB->addSuccessor(MemsetOneMBB, {10, 100});
9156 MBB->addSuccessor(StartMBB, {90, 100});
9157
9158 // MemsetOneMBB:
9159 // # Jump back to AllDoneMBB after a single MVI or STC.
9160 MBB = MemsetOneMBB;
9161 insertMemMemOp(MBB, MBB->end(),
9162 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
9163 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
9164 1);
9165 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
9166 MBB->addSuccessor(AllDoneMBB);
9167 }
9168
9169 // StartMBB:
9170 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
9171 MBB = StartMBB;
9172 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9173 .addReg(StartCountReg).addImm(0);
9174 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9176 .addMBB(DoneMBB);
9177 MBB->addSuccessor(DoneMBB);
9178 MBB->addSuccessor(LoopMBB);
9179 }
9180 else {
9181 StartMBB = MBB;
9182 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
9183 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9184 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
9185
9186 // StartMBB:
9187 // # fall through to LoopMBB
9188 MBB->addSuccessor(LoopMBB);
9189
9190 DestBase = MachineOperand::CreateReg(NextDestReg, false);
9191 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
9192 if (EndMBB && !ImmLength)
9193 // If the loop handled the whole CLC range, DoneMBB will be empty with
9194 // CC live-through into EndMBB, so add it as live-in.
9195 DoneMBB->addLiveIn(SystemZ::CC);
9196 }
9197
9198 // LoopMBB:
9199 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
9200 // [ %NextDestReg, NextMBB ]
9201 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
9202 // [ %NextSrcReg, NextMBB ]
9203 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
9204 // [ %NextCountReg, NextMBB ]
9205 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
9206 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
9207 // ( JLH EndMBB )
9208 //
9209 // The prefetch is used only for MVC. The JLH is used only for CLC.
9210 MBB = LoopMBB;
9211 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
9212 .addReg(StartDestReg).addMBB(StartMBB)
9213 .addReg(NextDestReg).addMBB(NextMBB);
9214 if (!HaveSingleBase)
9215 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
9216 .addReg(StartSrcReg).addMBB(StartMBB)
9217 .addReg(NextSrcReg).addMBB(NextMBB);
9218 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
9219 .addReg(StartCountReg).addMBB(StartMBB)
9220 .addReg(NextCountReg).addMBB(NextMBB);
9221 if (Opcode == SystemZ::MVC)
9222 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
9224 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
9225 insertMemMemOp(MBB, MBB->end(),
9226 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
9227 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
9228 if (EndMBB) {
9229 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9231 .addMBB(EndMBB);
9232 MBB->addSuccessor(EndMBB);
9233 MBB->addSuccessor(NextMBB);
9234 }
9235
9236 // NextMBB:
9237 // %NextDestReg = LA 256(%ThisDestReg)
9238 // %NextSrcReg = LA 256(%ThisSrcReg)
9239 // %NextCountReg = AGHI %ThisCountReg, -1
9240 // CGHI %NextCountReg, 0
9241 // JLH LoopMBB
9242 // # fall through to DoneMBB
9243 //
9244 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
9245 MBB = NextMBB;
9246 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
9247 .addReg(ThisDestReg).addImm(256).addReg(0);
9248 if (!HaveSingleBase)
9249 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
9250 .addReg(ThisSrcReg).addImm(256).addReg(0);
9251 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
9252 .addReg(ThisCountReg).addImm(-1);
9253 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9254 .addReg(NextCountReg).addImm(0);
9255 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9257 .addMBB(LoopMBB);
9258 MBB->addSuccessor(LoopMBB);
9259 MBB->addSuccessor(DoneMBB);
9260
9261 MBB = DoneMBB;
9262 if (IsRegForm) {
9263 // DoneMBB:
9264 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
9265 // # Use EXecute Relative Long for the remainder of the bytes. The target
9266 // instruction of the EXRL will have a length field of 1 since 0 is an
9267 // illegal value. The number of bytes processed becomes (%LenAdjReg &
9268 // 0xff) + 1.
9269 // # Fall through to AllDoneMBB.
9270 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9271 Register RemDestReg = HaveSingleBase ? RemSrcReg
9272 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9273 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
9274 .addReg(StartDestReg).addMBB(StartMBB)
9275 .addReg(NextDestReg).addMBB(NextMBB);
9276 if (!HaveSingleBase)
9277 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
9278 .addReg(StartSrcReg).addMBB(StartMBB)
9279 .addReg(NextSrcReg).addMBB(NextMBB);
9280 if (IsMemset)
9281 insertMemMemOp(MBB, MBB->end(),
9282 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
9283 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
9284 MachineInstrBuilder EXRL_MIB =
9285 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
9286 .addImm(Opcode)
9287 .addReg(LenAdjReg)
9288 .addReg(RemDestReg).addImm(DestDisp)
9289 .addReg(RemSrcReg).addImm(SrcDisp);
9290 MBB->addSuccessor(AllDoneMBB);
9291 MBB = AllDoneMBB;
9292 if (Opcode != SystemZ::MVC) {
9293 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
9294 if (EndMBB)
9295 MBB->addLiveIn(SystemZ::CC);
9296 }
9297 }
9299 }
9300
9301 // Handle any remaining bytes with straight-line code.
9302 while (ImmLength > 0) {
9303 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
9304 // The previous iteration might have created out-of-range displacements.
9305 // Apply them using LA/LAY if so.
9306 foldDisplIfNeeded(DestBase, DestDisp);
9307 foldDisplIfNeeded(SrcBase, SrcDisp);
9308 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
9309 DestDisp += ThisLength;
9310 SrcDisp += ThisLength;
9311 ImmLength -= ThisLength;
9312 // If there's another CLC to go, branch to the end if a difference
9313 // was found.
9314 if (EndMBB && ImmLength > 0) {
9316 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9318 .addMBB(EndMBB);
9319 MBB->addSuccessor(EndMBB);
9320 MBB->addSuccessor(NextMBB);
9321 MBB = NextMBB;
9322 }
9323 }
9324 if (EndMBB) {
9325 MBB->addSuccessor(EndMBB);
9326 MBB = EndMBB;
9327 MBB->addLiveIn(SystemZ::CC);
9328 }
9329
9330 MI.eraseFromParent();
9331 return MBB;
9332}
9333
9334// Decompose string pseudo-instruction MI into a loop that continually performs
9335// Opcode until CC != 3.
9336MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
9337 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9338 MachineFunction &MF = *MBB->getParent();
9339 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9341 DebugLoc DL = MI.getDebugLoc();
9342
9343 uint64_t End1Reg = MI.getOperand(0).getReg();
9344 uint64_t Start1Reg = MI.getOperand(1).getReg();
9345 uint64_t Start2Reg = MI.getOperand(2).getReg();
9346 uint64_t CharReg = MI.getOperand(3).getReg();
9347
9348 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
9349 uint64_t This1Reg = MRI.createVirtualRegister(RC);
9350 uint64_t This2Reg = MRI.createVirtualRegister(RC);
9351 uint64_t End2Reg = MRI.createVirtualRegister(RC);
9352
9353 MachineBasicBlock *StartMBB = MBB;
9355 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9356
9357 // StartMBB:
9358 // # fall through to LoopMBB
9359 MBB->addSuccessor(LoopMBB);
9360
9361 // LoopMBB:
9362 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
9363 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
9364 // R0L = %CharReg
9365 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
9366 // JO LoopMBB
9367 // # fall through to DoneMBB
9368 //
9369 // The load of R0L can be hoisted by post-RA LICM.
9370 MBB = LoopMBB;
9371
9372 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
9373 .addReg(Start1Reg).addMBB(StartMBB)
9374 .addReg(End1Reg).addMBB(LoopMBB);
9375 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
9376 .addReg(Start2Reg).addMBB(StartMBB)
9377 .addReg(End2Reg).addMBB(LoopMBB);
9378 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
9379 BuildMI(MBB, DL, TII->get(Opcode))
9380 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
9381 .addReg(This1Reg).addReg(This2Reg);
9382 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9384 MBB->addSuccessor(LoopMBB);
9385 MBB->addSuccessor(DoneMBB);
9386
9387 DoneMBB->addLiveIn(SystemZ::CC);
9388
9389 MI.eraseFromParent();
9390 return DoneMBB;
9391}
9392
9393// Update TBEGIN instruction with final opcode and register clobbers.
9394MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
9395 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
9396 bool NoFloat) const {
9397 MachineFunction &MF = *MBB->getParent();
9398 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9399 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9400
9401 // Update opcode.
9402 MI.setDesc(TII->get(Opcode));
9403
9404 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
9405 // Make sure to add the corresponding GRSM bits if they are missing.
9406 uint64_t Control = MI.getOperand(2).getImm();
9407 static const unsigned GPRControlBit[16] = {
9408 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
9409 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
9410 };
9411 Control |= GPRControlBit[15];
9412 if (TFI->hasFP(MF))
9413 Control |= GPRControlBit[11];
9414 MI.getOperand(2).setImm(Control);
9415
9416 // Add GPR clobbers.
9417 for (int I = 0; I < 16; I++) {
9418 if ((Control & GPRControlBit[I]) == 0) {
9419 unsigned Reg = SystemZMC::GR64Regs[I];
9420 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9421 }
9422 }
9423
9424 // Add FPR/VR clobbers.
9425 if (!NoFloat && (Control & 4) != 0) {
9426 if (Subtarget.hasVector()) {
9427 for (unsigned Reg : SystemZMC::VR128Regs) {
9428 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9429 }
9430 } else {
9431 for (unsigned Reg : SystemZMC::FP64Regs) {
9432 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9433 }
9434 }
9435 }
9436
9437 return MBB;
9438}
9439
9440MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
9441 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9442 MachineFunction &MF = *MBB->getParent();
9444 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9445 DebugLoc DL = MI.getDebugLoc();
9446
9447 Register SrcReg = MI.getOperand(0).getReg();
9448
9449 // Create new virtual register of the same class as source.
9450 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
9451 Register DstReg = MRI->createVirtualRegister(RC);
9452
9453 // Replace pseudo with a normal load-and-test that models the def as
9454 // well.
9455 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
9456 .addReg(SrcReg)
9457 .setMIFlags(MI.getFlags());
9458 MI.eraseFromParent();
9459
9460 return MBB;
9461}
9462
9463MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
9465 MachineFunction &MF = *MBB->getParent();
9467 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9468 DebugLoc DL = MI.getDebugLoc();
9469 const unsigned ProbeSize = getStackProbeSize(MF);
9470 Register DstReg = MI.getOperand(0).getReg();
9471 Register SizeReg = MI.getOperand(2).getReg();
9472
9473 MachineBasicBlock *StartMBB = MBB;
9475 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
9476 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
9477 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
9478 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
9479
9482
9483 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9484 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9485
9486 // LoopTestMBB
9487 // BRC TailTestMBB
9488 // # fallthrough to LoopBodyMBB
9489 StartMBB->addSuccessor(LoopTestMBB);
9490 MBB = LoopTestMBB;
9491 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
9492 .addReg(SizeReg)
9493 .addMBB(StartMBB)
9494 .addReg(IncReg)
9495 .addMBB(LoopBodyMBB);
9496 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
9497 .addReg(PHIReg)
9498 .addImm(ProbeSize);
9499 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9501 .addMBB(TailTestMBB);
9502 MBB->addSuccessor(LoopBodyMBB);
9503 MBB->addSuccessor(TailTestMBB);
9504
9505 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
9506 // J LoopTestMBB
9507 MBB = LoopBodyMBB;
9508 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
9509 .addReg(PHIReg)
9510 .addImm(ProbeSize);
9511 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
9512 .addReg(SystemZ::R15D)
9513 .addImm(ProbeSize);
9514 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9515 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
9516 .setMemRefs(VolLdMMO);
9517 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
9518 MBB->addSuccessor(LoopTestMBB);
9519
9520 // TailTestMBB
9521 // BRC DoneMBB
9522 // # fallthrough to TailMBB
9523 MBB = TailTestMBB;
9524 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9525 .addReg(PHIReg)
9526 .addImm(0);
9527 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9529 .addMBB(DoneMBB);
9530 MBB->addSuccessor(TailMBB);
9531 MBB->addSuccessor(DoneMBB);
9532
9533 // TailMBB
9534 // # fallthrough to DoneMBB
9535 MBB = TailMBB;
9536 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
9537 .addReg(SystemZ::R15D)
9538 .addReg(PHIReg);
9539 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9540 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
9541 .setMemRefs(VolLdMMO);
9542 MBB->addSuccessor(DoneMBB);
9543
9544 // DoneMBB
9545 MBB = DoneMBB;
9546 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
9547 .addReg(SystemZ::R15D);
9548
9549 MI.eraseFromParent();
9550 return DoneMBB;
9551}
9552
9553SDValue SystemZTargetLowering::
9554getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
9556 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
9557 SDLoc DL(SP);
9558 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
9559 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
9560}
9561
9564 switch (MI.getOpcode()) {
9565 case SystemZ::ADJCALLSTACKDOWN:
9566 case SystemZ::ADJCALLSTACKUP:
9567 return emitAdjCallStack(MI, MBB);
9568
9569 case SystemZ::Select32:
9570 case SystemZ::Select64:
9571 case SystemZ::Select128:
9572 case SystemZ::SelectF32:
9573 case SystemZ::SelectF64:
9574 case SystemZ::SelectF128:
9575 case SystemZ::SelectVR32:
9576 case SystemZ::SelectVR64:
9577 case SystemZ::SelectVR128:
9578 return emitSelect(MI, MBB);
9579
9580 case SystemZ::CondStore8Mux:
9581 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
9582 case SystemZ::CondStore8MuxInv:
9583 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
9584 case SystemZ::CondStore16Mux:
9585 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
9586 case SystemZ::CondStore16MuxInv:
9587 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
9588 case SystemZ::CondStore32Mux:
9589 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
9590 case SystemZ::CondStore32MuxInv:
9591 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
9592 case SystemZ::CondStore8:
9593 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
9594 case SystemZ::CondStore8Inv:
9595 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
9596 case SystemZ::CondStore16:
9597 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
9598 case SystemZ::CondStore16Inv:
9599 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
9600 case SystemZ::CondStore32:
9601 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
9602 case SystemZ::CondStore32Inv:
9603 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
9604 case SystemZ::CondStore64:
9605 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
9606 case SystemZ::CondStore64Inv:
9607 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
9608 case SystemZ::CondStoreF32:
9609 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
9610 case SystemZ::CondStoreF32Inv:
9611 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
9612 case SystemZ::CondStoreF64:
9613 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
9614 case SystemZ::CondStoreF64Inv:
9615 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
9616
9617 case SystemZ::SCmp128Hi:
9618 return emitICmp128Hi(MI, MBB, false);
9619 case SystemZ::UCmp128Hi:
9620 return emitICmp128Hi(MI, MBB, true);
9621
9622 case SystemZ::PAIR128:
9623 return emitPair128(MI, MBB);
9624 case SystemZ::AEXT128:
9625 return emitExt128(MI, MBB, false);
9626 case SystemZ::ZEXT128:
9627 return emitExt128(MI, MBB, true);
9628
9629 case SystemZ::ATOMIC_SWAPW:
9630 return emitAtomicLoadBinary(MI, MBB, 0);
9631
9632 case SystemZ::ATOMIC_LOADW_AR:
9633 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
9634 case SystemZ::ATOMIC_LOADW_AFI:
9635 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
9636
9637 case SystemZ::ATOMIC_LOADW_SR:
9638 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
9639
9640 case SystemZ::ATOMIC_LOADW_NR:
9641 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
9642 case SystemZ::ATOMIC_LOADW_NILH:
9643 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
9644
9645 case SystemZ::ATOMIC_LOADW_OR:
9646 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
9647 case SystemZ::ATOMIC_LOADW_OILH:
9648 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
9649
9650 case SystemZ::ATOMIC_LOADW_XR:
9651 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
9652 case SystemZ::ATOMIC_LOADW_XILF:
9653 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
9654
9655 case SystemZ::ATOMIC_LOADW_NRi:
9656 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
9657 case SystemZ::ATOMIC_LOADW_NILHi:
9658 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
9659
9660 case SystemZ::ATOMIC_LOADW_MIN:
9661 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
9662 case SystemZ::ATOMIC_LOADW_MAX:
9663 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
9664 case SystemZ::ATOMIC_LOADW_UMIN:
9665 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
9666 case SystemZ::ATOMIC_LOADW_UMAX:
9667 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
9668
9669 case SystemZ::ATOMIC_CMP_SWAPW:
9670 return emitAtomicCmpSwapW(MI, MBB);
9671 case SystemZ::MVCImm:
9672 case SystemZ::MVCReg:
9673 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
9674 case SystemZ::NCImm:
9675 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
9676 case SystemZ::OCImm:
9677 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
9678 case SystemZ::XCImm:
9679 case SystemZ::XCReg:
9680 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
9681 case SystemZ::CLCImm:
9682 case SystemZ::CLCReg:
9683 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
9684 case SystemZ::MemsetImmImm:
9685 case SystemZ::MemsetImmReg:
9686 case SystemZ::MemsetRegImm:
9687 case SystemZ::MemsetRegReg:
9688 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
9689 case SystemZ::CLSTLoop:
9690 return emitStringWrapper(MI, MBB, SystemZ::CLST);
9691 case SystemZ::MVSTLoop:
9692 return emitStringWrapper(MI, MBB, SystemZ::MVST);
9693 case SystemZ::SRSTLoop:
9694 return emitStringWrapper(MI, MBB, SystemZ::SRST);
9695 case SystemZ::TBEGIN:
9696 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
9697 case SystemZ::TBEGIN_nofloat:
9698 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
9699 case SystemZ::TBEGINC:
9700 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
9701 case SystemZ::LTEBRCompare_Pseudo:
9702 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
9703 case SystemZ::LTDBRCompare_Pseudo:
9704 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
9705 case SystemZ::LTXBRCompare_Pseudo:
9706 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
9707
9708 case SystemZ::PROBED_ALLOCA:
9709 return emitProbedAlloca(MI, MBB);
9710
9711 case TargetOpcode::STACKMAP:
9712 case TargetOpcode::PATCHPOINT:
9713 return emitPatchPoint(MI, MBB);
9714
9715 default:
9716 llvm_unreachable("Unexpected instr type to insert");
9717 }
9718}
9719
9720// This is only used by the isel schedulers, and is needed only to prevent
9721// compiler from crashing when list-ilp is used.
9722const TargetRegisterClass *
9723SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
9724 if (VT == MVT::Untyped)
9725 return &SystemZ::ADDR128BitRegClass;
9727}
9728
9729SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
9730 SelectionDAG &DAG) const {
9731 SDLoc dl(Op);
9732 /*
9733 The rounding method is in FPC Byte 3 bits 6-7, and has the following
9734 settings:
9735 00 Round to nearest
9736 01 Round to 0
9737 10 Round to +inf
9738 11 Round to -inf
9739
9740 FLT_ROUNDS, on the other hand, expects the following:
9741 -1 Undefined
9742 0 Round to 0
9743 1 Round to nearest
9744 2 Round to +inf
9745 3 Round to -inf
9746 */
9747
9748 // Save FPC to register.
9749 SDValue Chain = Op.getOperand(0);
9750 SDValue EFPC(
9751 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
9752 Chain = EFPC.getValue(1);
9753
9754 // Transform as necessary
9755 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
9756 DAG.getConstant(3, dl, MVT::i32));
9757 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
9758 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
9759 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
9760 DAG.getConstant(1, dl, MVT::i32)));
9761
9762 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
9763 DAG.getConstant(1, dl, MVT::i32));
9764 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
9765
9766 return DAG.getMergeValues({RetVal, Chain}, dl);
9767}
9768
9769SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
9770 SelectionDAG &DAG) const {
9771 EVT VT = Op.getValueType();
9772 Op = Op.getOperand(0);
9773 EVT OpVT = Op.getValueType();
9774
9775 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
9776
9777 SDLoc DL(Op);
9778
9779 // load a 0 vector for the third operand of VSUM.
9780 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));
9781
9782 // execute VSUM.
9783 switch (OpVT.getScalarSizeInBits()) {
9784 case 8:
9785 case 16:
9786 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
9787 [[fallthrough]];
9788 case 32:
9789 case 64:
9790 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
9791 DAG.getBitcast(Op.getValueType(), Zero));
9792 break;
9793 case 128:
9794 break; // VSUM over v1i128 should not happen and would be a noop
9795 default:
9796 llvm_unreachable("Unexpected scalar size.");
9797 }
9798 // Cast to original vector type, retrieve last element.
9799 return DAG.getNode(
9800 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),
9801 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));
9802}
unsigned const MachineRegisterInfo * MRI
#define Success
unsigned Intr
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
amdgpu AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
IRTranslator LLVM IR MI
#define RegName(no)
lazy value info
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
uint64_t High
uint64_t IntrinsicInst * II
#define P(N)
static bool isSelectPseudo(MachineInstr &MI)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue lowerAddrSpaceCast(SDValue Op, SelectionDAG &DAG)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
#define OPCODE(NAME)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1364
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1470
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1446
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition: APInt.h:300
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
@ Add
*p = old + v
Definition: Instructions.h:712
@ Or
*p = old | v
Definition: Instructions.h:720
@ Sub
*p = old - v
Definition: Instructions.h:714
@ And
*p = old & v
Definition: Instructions.h:716
@ Xor
*p = old ^ v
Definition: Instructions.h:722
BinOp getOperation() const
Definition: Instructions.h:787
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
The address of a basic block.
Definition: Constants.h:890
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:146
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:769
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:781
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:281
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:743
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:588
bool hasPrivateLinkage() const
Definition: GlobalValue.h:527
bool hasInternalLinkage() const
Definition: GlobalValue.h:526
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:174
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setMaxCallFrameSize(uint64_t S)
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setReturnAddressIsTaken(bool s)
MachineFunctionProperties & reset(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getMachineOpcode() const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:226
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:736
SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:493
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:746
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:842
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:487
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:876
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:488
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:787
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:690
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:782
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:482
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:813
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:859
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:500
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:753
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:570
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void reserve(size_type N)
Definition: SmallVector.h:676
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:290
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:455
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:250
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:669
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
iterator end() const
Definition: StringRef.h:113
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
A SystemZ-specific class detailing special use registers particular for calling conventions.
A SystemZ-specific constant pool value.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const
const TargetFrameLowering * getFrameLowering() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
const SystemZRegisterInfo * getRegisterInfo() const override
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
bool useSoftFloat() const override
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Let target indicate that an extending atomic load of the specified type is legal.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:261
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
use_iterator use_begin()
Definition: Value.h:360
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1194
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1190
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:752
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:490
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1337
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:743
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1223
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1339
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1309
@ STRICT_FCEIL
Definition: ISDOpcodes.h:440
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1340
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1099
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
Definition: ISDOpcodes.h:1322
@ STRICT_FMINIMUM
Definition: ISDOpcodes.h:450
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:840
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:716
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1296
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1301
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:870
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:491
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1335
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:953
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1336
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1480
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:464
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1289
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1056
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:980
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1145
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1338
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1124
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:521
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:756
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1305
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:641
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1219
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:444
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1434
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:930
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:673
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:614
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1333
@ STRICT_FMAXIMUM
Definition: ISDOpcodes.h:449
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:438
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:439
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:771
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1341
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1028
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1109
@ ConstantPool
Definition: ISDOpcodes.h:82
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:859
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:848
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:938
@ STRICT_FROUND
Definition: ISDOpcodes.h:442
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:765
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:463
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:441
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1331
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:457
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:479
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:456
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1047
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1332
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:886
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1250
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:484
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1276
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1330
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:919
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:881
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:957
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:437
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1138
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:793
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:507
@ AssertZext
Definition: ISDOpcodes.h:62
@ STRICT_FRINT
Definition: ISDOpcodes.h:436
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1388
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1603
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1583
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition: SystemZ.h:40
static bool isImmHH(uint64_t Val)
Definition: SystemZ.h:176
const unsigned CCMASK_TEND
Definition: SystemZ.h:97
const unsigned CCMASK_CS_EQ
Definition: SystemZ.h:67
const unsigned CCMASK_TBEGIN
Definition: SystemZ.h:92
const unsigned CCMASK_0
Definition: SystemZ.h:27
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition: SystemZ.h:82
const unsigned CCMASK_LOGICAL_CARRY
Definition: SystemZ.h:60
const unsigned TDCMASK_NORMAL_MINUS
Definition: SystemZ.h:122
const unsigned CCMASK_TDC
Definition: SystemZ.h:109
const unsigned CCMASK_FCMP
Definition: SystemZ.h:48
const unsigned CCMASK_TM_SOME_0
Definition: SystemZ.h:81
static bool isImmHL(uint64_t Val)
Definition: SystemZ.h:171
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition: SystemZ.h:124
const unsigned PFD_READ
Definition: SystemZ.h:115
const unsigned CCMASK_1
Definition: SystemZ.h:28
const unsigned TDCMASK_NORMAL_PLUS
Definition: SystemZ.h:121
const unsigned PFD_WRITE
Definition: SystemZ.h:116
const unsigned CCMASK_CMP_GT
Definition: SystemZ.h:37
const unsigned TDCMASK_QNAN_MINUS
Definition: SystemZ.h:128
const unsigned CCMASK_CS
Definition: SystemZ.h:69
const unsigned CCMASK_ANY
Definition: SystemZ.h:31
const unsigned CCMASK_ARITH
Definition: SystemZ.h:55
const unsigned CCMASK_TM_MIXED_MSB_0
Definition: SystemZ.h:78
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition: SystemZ.h:123
static bool isImmLL(uint64_t Val)
Definition: SystemZ.h:161
const unsigned VectorBits
Definition: SystemZ.h:154
static bool isImmLH(uint64_t Val)
Definition: SystemZ.h:166
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition: SystemZ.h:125
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition: SystemZ.h:77
const unsigned IPM_CC
Definition: SystemZ.h:112
const unsigned CCMASK_CMP_LE
Definition: SystemZ.h:39
const unsigned CCMASK_CMP_O
Definition: SystemZ.h:44
const unsigned CCMASK_CMP_EQ
Definition: SystemZ.h:35
const unsigned VectorBytes
Definition: SystemZ.h:158
const unsigned TDCMASK_INFINITY_MINUS
Definition: SystemZ.h:126
const unsigned CCMASK_ICMP
Definition: SystemZ.h:47
const unsigned CCMASK_VCMP_ALL
Definition: SystemZ.h:101
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition: SystemZ.h:104
const unsigned CCMASK_TM_MIXED_MSB_1
Definition: SystemZ.h:79
const unsigned CCMASK_TM_MSB_0
Definition: SystemZ.h:83
const unsigned CCMASK_ARITH_OVERFLOW
Definition: SystemZ.h:54
const unsigned CCMASK_CS_NE
Definition: SystemZ.h:68
const unsigned TDCMASK_SNAN_PLUS
Definition: SystemZ.h:129
const unsigned CCMASK_TM
Definition: SystemZ.h:85
const unsigned CCMASK_3
Definition: SystemZ.h:30
const unsigned CCMASK_CMP_LT
Definition: SystemZ.h:36
const unsigned CCMASK_CMP_NE
Definition: SystemZ.h:38
const unsigned TDCMASK_ZERO_PLUS
Definition: SystemZ.h:119
const unsigned TDCMASK_QNAN_PLUS
Definition: SystemZ.h:127
const unsigned TDCMASK_ZERO_MINUS
Definition: SystemZ.h:120
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition: SystemZ.h:80
const unsigned CCMASK_LOGICAL_BORROW
Definition: SystemZ.h:62
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition: SystemZ.h:43
const unsigned CCMASK_LOGICAL
Definition: SystemZ.h:64
const unsigned CCMASK_TM_MSB_1
Definition: SystemZ.h:84
const unsigned TDCMASK_SNAN_MINUS
Definition: SystemZ.h:130
@ GeneralDynamic
Definition: CodeGen.h:46
@ GS
Definition: X86.h:209
Reg
All possible values of the reg field in the ModR/M byte.
support::ulittle32_t Word
Definition: IRSymtab.h:52
NodeAddr< CodeNode * > Code
Definition: RDFGraph.h:388
constexpr const char32_t SBase
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:353
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Done
Definition: Threading.h:61
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ Mul
Product of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:581
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define EQ(a, b)
Definition: regexec.c:112
#define NC
Definition: regutils.h:42
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:381
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:371
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:307
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:367
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:314
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:204
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:239
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:319
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:327
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:175
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:161
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:70
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:300
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:169
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:134
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})