LLVM 19.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
24#include "llvm/IR/Intrinsics.h"
25#include "llvm/IR/IntrinsicsS390.h"
29#include <cctype>
30#include <optional>
31
32using namespace llvm;
33
34#define DEBUG_TYPE "systemz-lower"
35
36namespace {
37// Represents information about a comparison.
38struct Comparison {
39 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
40 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
41 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
42
43 // The operands to the comparison.
44 SDValue Op0, Op1;
45
46 // Chain if this is a strict floating-point comparison.
47 SDValue Chain;
48
49 // The opcode that should be used to compare Op0 and Op1.
50 unsigned Opcode;
51
52 // A SystemZICMP value. Only used for integer comparisons.
53 unsigned ICmpType;
54
55 // The mask of CC values that Opcode can produce.
56 unsigned CCValid;
57
58 // The mask of CC values for which the original condition is true.
59 unsigned CCMask;
60};
61} // end anonymous namespace
62
63// Classify VT as either 32 or 64 bit.
64static bool is32Bit(EVT VT) {
65 switch (VT.getSimpleVT().SimpleTy) {
66 case MVT::i32:
67 return true;
68 case MVT::i64:
69 return false;
70 default:
71 llvm_unreachable("Unsupported type");
72 }
73}
74
75// Return a version of MachineOperand that can be safely used before the
76// final use.
78 if (Op.isReg())
79 Op.setIsKill(false);
80 return Op;
81}
82
84 const SystemZSubtarget &STI)
85 : TargetLowering(TM), Subtarget(STI) {
86 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
87
88 auto *Regs = STI.getSpecialRegisters();
89
90 // Set up the register classes.
91 if (Subtarget.hasHighWord())
92 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
93 else
94 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
95 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
96 if (!useSoftFloat()) {
97 if (Subtarget.hasVector()) {
98 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
99 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
100 } else {
101 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
102 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
103 }
104 if (Subtarget.hasVectorEnhancements1())
105 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
106 else
107 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
108
109 if (Subtarget.hasVector()) {
110 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
111 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
112 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
113 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
114 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
115 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
116 }
117
118 if (Subtarget.hasVector())
119 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
120 }
121
122 // Compute derived properties from the register classes
124
125 // Set up special registers.
126 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
127
128 // TODO: It may be better to default to latency-oriented scheduling, however
129 // LLVM's current latency-oriented scheduler can't handle physreg definitions
130 // such as SystemZ has with CC, so set this to the register-pressure
131 // scheduler, because it can.
133
136
138
139 // Instructions are strings of 2-byte aligned 2-byte values.
141 // For performance reasons we prefer 16-byte alignment.
143
144 // Handle operations that are handled in a similar way for all types.
145 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
146 I <= MVT::LAST_FP_VALUETYPE;
147 ++I) {
149 if (isTypeLegal(VT)) {
150 // Lower SET_CC into an IPM-based sequence.
154
155 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
157
158 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
161 }
162 }
163
164 // Expand jump table branches as address arithmetic followed by an
165 // indirect jump.
167
168 // Expand BRCOND into a BR_CC (see above).
170
171 // Handle integer types except i128.
172 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
173 I <= MVT::LAST_INTEGER_VALUETYPE;
174 ++I) {
176 if (isTypeLegal(VT) && VT != MVT::i128) {
178
179 // Expand individual DIV and REMs into DIVREMs.
186
187 // Support addition/subtraction with overflow.
190
191 // Support addition/subtraction with carry.
194
195 // Support carry in as value rather than glue.
198
199 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
200 // available, or if the operand is constant.
202
203 // Use POPCNT on z196 and above.
204 if (Subtarget.hasPopulationCount())
206 else
208
209 // No special instructions for these.
212
213 // Use *MUL_LOHI where possible instead of MULH*.
218
219 // Only z196 and above have native support for conversions to unsigned.
220 // On z10, promoting to i64 doesn't generate an inexact condition for
221 // values that are outside the i32 range but in the i64 range, so use
222 // the default expansion.
223 if (!Subtarget.hasFPExtension())
225
226 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
227 // default to Expand, so need to be modified to Legal where appropriate.
229 if (Subtarget.hasFPExtension())
231
232 // And similarly for STRICT_[SU]INT_TO_FP.
234 if (Subtarget.hasFPExtension())
236 }
237 }
238
239 // Handle i128 if legal.
240 if (isTypeLegal(MVT::i128)) {
241 // No special instructions for these.
257
258 // Support addition/subtraction with carry.
263
264 // Use VPOPCT and add up partial results.
266
267 // We have to use libcalls for these.
276 }
277
278 // Type legalization will convert 8- and 16-bit atomic operations into
279 // forms that operate on i32s (but still keeping the original memory VT).
280 // Lower them into full i32 operations.
292
293 // Whether or not i128 is not a legal type, we need to custom lower
294 // the atomic operations in order to exploit SystemZ instructions.
297
298 // Mark sign/zero extending atomic loads as legal, which will make
299 // DAGCombiner fold extensions into atomic loads if possible.
301 {MVT::i8, MVT::i16, MVT::i32}, Legal);
303 {MVT::i8, MVT::i16}, Legal);
305 MVT::i8, Legal);
306
307 // We can use the CC result of compare-and-swap to implement
308 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
312
314
315 // Traps are legal, as we will convert them to "j .+2".
316 setOperationAction(ISD::TRAP, MVT::Other, Legal);
317
318 // z10 has instructions for signed but not unsigned FP conversion.
319 // Handle unsigned 32-bit types as signed 64-bit types.
320 if (!Subtarget.hasFPExtension()) {
325 }
326
327 // We have native support for a 64-bit CTLZ, via FLOGR.
331
332 // On z15 we have native support for a 64-bit CTPOP.
333 if (Subtarget.hasMiscellaneousExtensions3()) {
336 }
337
338 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
340
341 // Expand 128 bit shifts without using a libcall.
345 setLibcallName(RTLIB::SRL_I128, nullptr);
346 setLibcallName(RTLIB::SHL_I128, nullptr);
347 setLibcallName(RTLIB::SRA_I128, nullptr);
348
349 // Also expand 256 bit shifts if i128 is a legal type.
350 if (isTypeLegal(MVT::i128)) {
354 }
355
356 // Handle bitcast from fp128 to i128.
357 if (!isTypeLegal(MVT::i128))
359
360 // We have native instructions for i8, i16 and i32 extensions, but not i1.
362 for (MVT VT : MVT::integer_valuetypes()) {
366 }
367
368 // Handle the various types of symbolic address.
374
375 // We need to handle dynamic allocations specially because of the
376 // 160-byte area at the bottom of the stack.
379
382
383 // Handle prefetches with PFD or PFDRL.
385
386 // Handle readcyclecounter with STCKF.
388
390 // Assume by default that all vector operations need to be expanded.
391 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
392 if (getOperationAction(Opcode, VT) == Legal)
393 setOperationAction(Opcode, VT, Expand);
394
395 // Likewise all truncating stores and extending loads.
396 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
397 setTruncStoreAction(VT, InnerVT, Expand);
400 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
401 }
402
403 if (isTypeLegal(VT)) {
404 // These operations are legal for anything that can be stored in a
405 // vector register, even if there is no native support for the format
406 // as such. In particular, we can do these for v4f32 even though there
407 // are no specific instructions for that format.
413
414 // Likewise, except that we need to replace the nodes with something
415 // more specific.
418 }
419 }
420
421 // Handle integer vector types.
423 if (isTypeLegal(VT)) {
424 // These operations have direct equivalents.
429 if (VT != MVT::v2i64)
435 if (Subtarget.hasVectorEnhancements1())
437 else
441
442 // Convert a GPR scalar to a vector by inserting it into element 0.
444
445 // Use a series of unpacks for extensions.
448
449 // Detect shifts/rotates by a scalar amount and convert them into
450 // V*_BY_SCALAR.
455
456 // Add ISD::VECREDUCE_ADD as custom in order to implement
457 // it with VZERO+VSUM
459
460 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
461 // and inverting the result as necessary.
463 }
464 }
465
466 if (Subtarget.hasVector()) {
467 // There should be no need to check for float types other than v2f64
468 // since <2 x f32> isn't a legal type.
477
486 }
487
488 if (Subtarget.hasVectorEnhancements2()) {
497
506 }
507
508 // Handle floating-point types.
509 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
510 I <= MVT::LAST_FP_VALUETYPE;
511 ++I) {
513 if (isTypeLegal(VT)) {
514 // We can use FI for FRINT.
516
517 // We can use the extended form of FI for other rounding operations.
518 if (Subtarget.hasFPExtension()) {
524 }
525
526 // No special instructions for these.
532
533 // Special treatment.
535
536 // Handle constrained floating-point operations.
546 if (Subtarget.hasFPExtension()) {
552 }
553 }
554 }
555
556 // Handle floating-point vector types.
557 if (Subtarget.hasVector()) {
558 // Scalar-to-vector conversion is just a subreg.
561
562 // Some insertions and extractions can be done directly but others
563 // need to go via integers.
568
569 // These operations have direct equivalents.
570 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
571 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
572 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
573 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
574 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
575 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
576 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
577 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
578 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
581 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
584
585 // Handle constrained floating-point operations.
598
603 if (Subtarget.hasVectorEnhancements1()) {
606 }
607 }
608
609 // The vector enhancements facility 1 has instructions for these.
610 if (Subtarget.hasVectorEnhancements1()) {
611 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
612 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
613 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
614 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
615 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
616 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
617 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
618 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
619 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
622 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
625
630
635
640
645
650
651 // Handle constrained floating-point operations.
664 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
665 MVT::v4f32, MVT::v2f64 }) {
670 }
671 }
672
673 // We only have fused f128 multiply-addition on vector registers.
674 if (!Subtarget.hasVectorEnhancements1()) {
677 }
678
679 // We don't have a copysign instruction on vector registers.
680 if (Subtarget.hasVectorEnhancements1())
682
683 // Needed so that we don't try to implement f128 constant loads using
684 // a load-and-extend of a f80 constant (in cases where the constant
685 // would fit in an f80).
686 for (MVT VT : MVT::fp_valuetypes())
687 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
688
689 // We don't have extending load instruction on vector registers.
690 if (Subtarget.hasVectorEnhancements1()) {
691 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
692 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
693 }
694
695 // Floating-point truncation and stores need to be done separately.
696 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
697 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
698 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
699
700 // We have 64-bit FPR<->GPR moves, but need special handling for
701 // 32-bit forms.
702 if (!Subtarget.hasVector()) {
705 }
706
707 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
708 // structure, but VAEND is a no-op.
712
714
715 // Codes for which we want to perform some z-specific combinations.
719 ISD::LOAD,
730 ISD::SDIV,
731 ISD::UDIV,
732 ISD::SREM,
733 ISD::UREM,
736
737 // Handle intrinsics.
740
741 // We want to use MVC in preference to even a single load/store pair.
742 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
744
745 // The main memset sequence is a byte store followed by an MVC.
746 // Two STC or MV..I stores win over that, but the kind of fused stores
747 // generated by target-independent code don't when the byte value is
748 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
749 // than "STC;MVC". Handle the choice in target-specific code instead.
750 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
752
753 // Default to having -disable-strictnode-mutation on
754 IsStrictFPEnabled = true;
755
756 if (Subtarget.isTargetzOS()) {
757 struct RTLibCallMapping {
758 RTLIB::Libcall Code;
759 const char *Name;
760 };
761 static RTLibCallMapping RTLibCallCommon[] = {
762#define HANDLE_LIBCALL(code, name) {RTLIB::code, name},
763#include "ZOSLibcallNames.def"
764 };
765 for (auto &E : RTLibCallCommon)
766 setLibcallName(E.Code, E.Name);
767 }
768}
769
771 return Subtarget.hasSoftFloat();
772}
773
775 LLVMContext &, EVT VT) const {
776 if (!VT.isVector())
777 return MVT::i32;
779}
780
782 const MachineFunction &MF, EVT VT) const {
783 VT = VT.getScalarType();
784
785 if (!VT.isSimple())
786 return false;
787
788 switch (VT.getSimpleVT().SimpleTy) {
789 case MVT::f32:
790 case MVT::f64:
791 return true;
792 case MVT::f128:
793 return Subtarget.hasVectorEnhancements1();
794 default:
795 break;
796 }
797
798 return false;
799}
800
801// Return true if the constant can be generated with a vector instruction,
802// such as VGM, VGMB or VREPI.
804 const SystemZSubtarget &Subtarget) {
805 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
806 if (!Subtarget.hasVector() ||
807 (isFP128 && !Subtarget.hasVectorEnhancements1()))
808 return false;
809
810 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
811 // preferred way of creating all-zero and all-one vectors so give it
812 // priority over other methods below.
813 unsigned Mask = 0;
814 unsigned I = 0;
815 for (; I < SystemZ::VectorBytes; ++I) {
816 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
817 if (Byte == 0xff)
818 Mask |= 1ULL << I;
819 else if (Byte != 0)
820 break;
821 }
822 if (I == SystemZ::VectorBytes) {
824 OpVals.push_back(Mask);
826 return true;
827 }
828
829 if (SplatBitSize > 64)
830 return false;
831
832 auto tryValue = [&](uint64_t Value) -> bool {
833 // Try VECTOR REPLICATE IMMEDIATE
834 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
835 if (isInt<16>(SignedValue)) {
836 OpVals.push_back(((unsigned) SignedValue));
839 SystemZ::VectorBits / SplatBitSize);
840 return true;
841 }
842 // Try VECTOR GENERATE MASK
843 unsigned Start, End;
844 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
845 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
846 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
847 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
848 OpVals.push_back(Start - (64 - SplatBitSize));
849 OpVals.push_back(End - (64 - SplatBitSize));
852 SystemZ::VectorBits / SplatBitSize);
853 return true;
854 }
855 return false;
856 };
857
858 // First try assuming that any undefined bits above the highest set bit
859 // and below the lowest set bit are 1s. This increases the likelihood of
860 // being able to use a sign-extended element value in VECTOR REPLICATE
861 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
862 uint64_t SplatBitsZ = SplatBits.getZExtValue();
863 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
864 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
865 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
866 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
867 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
868 if (tryValue(SplatBitsZ | Upper | Lower))
869 return true;
870
871 // Now try assuming that any undefined bits between the first and
872 // last defined set bits are set. This increases the chances of
873 // using a non-wraparound mask.
874 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
875 return tryValue(SplatBitsZ | Middle);
876}
877
879 if (IntImm.isSingleWord()) {
880 IntBits = APInt(128, IntImm.getZExtValue());
881 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
882 } else
883 IntBits = IntImm;
884 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
885
886 // Find the smallest splat.
887 SplatBits = IntImm;
888 unsigned Width = SplatBits.getBitWidth();
889 while (Width > 8) {
890 unsigned HalfSize = Width / 2;
891 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
892 APInt LowValue = SplatBits.trunc(HalfSize);
893
894 // If the two halves do not match, stop here.
895 if (HighValue != LowValue || 8 > HalfSize)
896 break;
897
898 SplatBits = HighValue;
899 Width = HalfSize;
900 }
901 SplatUndef = 0;
902 SplatBitSize = Width;
903}
904
906 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
907 bool HasAnyUndefs;
908
909 // Get IntBits by finding the 128 bit splat.
910 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
911 true);
912
913 // Get SplatBits by finding the 8 bit or greater splat.
914 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
915 true);
916}
917
919 bool ForCodeSize) const {
920 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
921 if (Imm.isZero() || Imm.isNegZero())
922 return true;
923
925}
926
927/// Returns true if stack probing through inline assembly is requested.
929 // If the function specifically requests inline stack probes, emit them.
930 if (MF.getFunction().hasFnAttribute("probe-stack"))
931 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
932 "inline-asm";
933 return false;
934}
935
938 // Lower fp128 the same way as i128.
939 if (LI->getType()->isFP128Ty())
942}
943
946 // Lower fp128 the same way as i128.
947 if (SI->getValueOperand()->getType()->isFP128Ty())
950}
951
954 // Don't expand subword operations as they require special treatment.
955 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
957
958 // Don't expand if there is a target instruction available.
959 if (Subtarget.hasInterlockedAccess1() &&
960 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
967
969}
970
972 // We can use CGFI or CLGFI.
973 return isInt<32>(Imm) || isUInt<32>(Imm);
974}
975
977 // We can use ALGFI or SLGFI.
978 return isUInt<32>(Imm) || isUInt<32>(-Imm);
979}
980
982 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
983 // Unaligned accesses should never be slower than the expanded version.
984 // We check specifically for aligned accesses in the few cases where
985 // they are required.
986 if (Fast)
987 *Fast = 1;
988 return true;
989}
990
991// Information about the addressing mode for a memory access.
993 // True if a long displacement is supported.
995
996 // True if use of index register is supported.
998
999 AddressingMode(bool LongDispl, bool IdxReg) :
1000 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
1001};
1002
1003// Return the desired addressing mode for a Load which has only one use (in
1004// the same block) which is a Store.
1006 Type *Ty) {
1007 // With vector support a Load->Store combination may be combined to either
1008 // an MVC or vector operations and it seems to work best to allow the
1009 // vector addressing mode.
1010 if (HasVector)
1011 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1012
1013 // Otherwise only the MVC case is special.
1014 bool MVC = Ty->isIntegerTy(8);
1015 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1016}
1017
1018// Return the addressing mode which seems most desirable given an LLVM
1019// Instruction pointer.
1020static AddressingMode
1022 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1023 switch (II->getIntrinsicID()) {
1024 default: break;
1025 case Intrinsic::memset:
1026 case Intrinsic::memmove:
1027 case Intrinsic::memcpy:
1028 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1029 }
1030 }
1031
1032 if (isa<LoadInst>(I) && I->hasOneUse()) {
1033 auto *SingleUser = cast<Instruction>(*I->user_begin());
1034 if (SingleUser->getParent() == I->getParent()) {
1035 if (isa<ICmpInst>(SingleUser)) {
1036 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1037 if (C->getBitWidth() <= 64 &&
1038 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1039 // Comparison of memory with 16 bit signed / unsigned immediate
1040 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1041 } else if (isa<StoreInst>(SingleUser))
1042 // Load->Store
1043 return getLoadStoreAddrMode(HasVector, I->getType());
1044 }
1045 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1046 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1047 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1048 // Load->Store
1049 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1050 }
1051
1052 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1053
1054 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1055 // dependencies (LDE only supports small offsets).
1056 // * Utilize the vector registers to hold floating point
1057 // values (vector load / store instructions only support small
1058 // offsets).
1059
1060 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1061 I->getOperand(0)->getType());
1062 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1063 bool IsVectorAccess = MemAccessTy->isVectorTy();
1064
1065 // A store of an extracted vector element will be combined into a VSTE type
1066 // instruction.
1067 if (!IsVectorAccess && isa<StoreInst>(I)) {
1068 Value *DataOp = I->getOperand(0);
1069 if (isa<ExtractElementInst>(DataOp))
1070 IsVectorAccess = true;
1071 }
1072
1073 // A load which gets inserted into a vector element will be combined into a
1074 // VLE type instruction.
1075 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1076 User *LoadUser = *I->user_begin();
1077 if (isa<InsertElementInst>(LoadUser))
1078 IsVectorAccess = true;
1079 }
1080
1081 if (IsFPAccess || IsVectorAccess)
1082 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1083 }
1084
1085 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1086}
1087
1089 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1090 // Punt on globals for now, although they can be used in limited
1091 // RELATIVE LONG cases.
1092 if (AM.BaseGV)
1093 return false;
1094
1095 // Require a 20-bit signed offset.
1096 if (!isInt<20>(AM.BaseOffs))
1097 return false;
1098
1099 bool RequireD12 =
1100 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1101 AddressingMode SupportedAM(!RequireD12, true);
1102 if (I != nullptr)
1103 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1104
1105 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1106 return false;
1107
1108 if (!SupportedAM.IndexReg)
1109 // No indexing allowed.
1110 return AM.Scale == 0;
1111 else
1112 // Indexing is OK but no scale factor can be applied.
1113 return AM.Scale == 0 || AM.Scale == 1;
1114}
1115
1117 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
1118 unsigned SrcAS, const AttributeList &FuncAttributes) const {
1119 const int MVCFastLen = 16;
1120
1121 if (Limit != ~unsigned(0)) {
1122 // Don't expand Op into scalar loads/stores in these cases:
1123 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1124 return false; // Small memcpy: Use MVC
1125 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1126 return false; // Small memset (first byte with STC/MVI): Use MVC
1127 if (Op.isZeroMemset())
1128 return false; // Memset zero: Use XC
1129 }
1130
1131 return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
1132 SrcAS, FuncAttributes);
1133}
1134
1136 const AttributeList &FuncAttributes) const {
1137 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1138}
1139
1140bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1141 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1142 return false;
1143 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1144 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1145 return FromBits > ToBits;
1146}
1147
1149 if (!FromVT.isInteger() || !ToVT.isInteger())
1150 return false;
1151 unsigned FromBits = FromVT.getFixedSizeInBits();
1152 unsigned ToBits = ToVT.getFixedSizeInBits();
1153 return FromBits > ToBits;
1154}
1155
1156//===----------------------------------------------------------------------===//
1157// Inline asm support
1158//===----------------------------------------------------------------------===//
1159
1162 if (Constraint.size() == 1) {
1163 switch (Constraint[0]) {
1164 case 'a': // Address register
1165 case 'd': // Data register (equivalent to 'r')
1166 case 'f': // Floating-point register
1167 case 'h': // High-part register
1168 case 'r': // General-purpose register
1169 case 'v': // Vector register
1170 return C_RegisterClass;
1171
1172 case 'Q': // Memory with base and unsigned 12-bit displacement
1173 case 'R': // Likewise, plus an index
1174 case 'S': // Memory with base and signed 20-bit displacement
1175 case 'T': // Likewise, plus an index
1176 case 'm': // Equivalent to 'T'.
1177 return C_Memory;
1178
1179 case 'I': // Unsigned 8-bit constant
1180 case 'J': // Unsigned 12-bit constant
1181 case 'K': // Signed 16-bit constant
1182 case 'L': // Signed 20-bit displacement (on all targets we support)
1183 case 'M': // 0x7fffffff
1184 return C_Immediate;
1185
1186 default:
1187 break;
1188 }
1189 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1190 switch (Constraint[1]) {
1191 case 'Q': // Address with base and unsigned 12-bit displacement
1192 case 'R': // Likewise, plus an index
1193 case 'S': // Address with base and signed 20-bit displacement
1194 case 'T': // Likewise, plus an index
1195 return C_Address;
1196
1197 default:
1198 break;
1199 }
1200 }
1201 return TargetLowering::getConstraintType(Constraint);
1202}
1203
1206 const char *constraint) const {
1208 Value *CallOperandVal = info.CallOperandVal;
1209 // If we don't have a value, we can't do a match,
1210 // but allow it at the lowest weight.
1211 if (!CallOperandVal)
1212 return CW_Default;
1213 Type *type = CallOperandVal->getType();
1214 // Look at the constraint type.
1215 switch (*constraint) {
1216 default:
1218 break;
1219
1220 case 'a': // Address register
1221 case 'd': // Data register (equivalent to 'r')
1222 case 'h': // High-part register
1223 case 'r': // General-purpose register
1224 weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1225 break;
1226
1227 case 'f': // Floating-point register
1228 if (!useSoftFloat())
1229 weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1230 break;
1231
1232 case 'v': // Vector register
1233 if (Subtarget.hasVector())
1234 weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1235 : CW_Default;
1236 break;
1237
1238 case 'I': // Unsigned 8-bit constant
1239 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1240 if (isUInt<8>(C->getZExtValue()))
1241 weight = CW_Constant;
1242 break;
1243
1244 case 'J': // Unsigned 12-bit constant
1245 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1246 if (isUInt<12>(C->getZExtValue()))
1247 weight = CW_Constant;
1248 break;
1249
1250 case 'K': // Signed 16-bit constant
1251 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1252 if (isInt<16>(C->getSExtValue()))
1253 weight = CW_Constant;
1254 break;
1255
1256 case 'L': // Signed 20-bit displacement (on all targets we support)
1257 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1258 if (isInt<20>(C->getSExtValue()))
1259 weight = CW_Constant;
1260 break;
1261
1262 case 'M': // 0x7fffffff
1263 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1264 if (C->getZExtValue() == 0x7fffffff)
1265 weight = CW_Constant;
1266 break;
1267 }
1268 return weight;
1269}
1270
1271// Parse a "{tNNN}" register constraint for which the register type "t"
1272// has already been verified. MC is the class associated with "t" and
1273// Map maps 0-based register numbers to LLVM register numbers.
1274static std::pair<unsigned, const TargetRegisterClass *>
1276 const unsigned *Map, unsigned Size) {
1277 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1278 if (isdigit(Constraint[2])) {
1279 unsigned Index;
1280 bool Failed =
1281 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1282 if (!Failed && Index < Size && Map[Index])
1283 return std::make_pair(Map[Index], RC);
1284 }
1285 return std::make_pair(0U, nullptr);
1286}
1287
1288std::pair<unsigned, const TargetRegisterClass *>
1290 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1291 if (Constraint.size() == 1) {
1292 // GCC Constraint Letters
1293 switch (Constraint[0]) {
1294 default: break;
1295 case 'd': // Data register (equivalent to 'r')
1296 case 'r': // General-purpose register
1297 if (VT.getSizeInBits() == 64)
1298 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1299 else if (VT.getSizeInBits() == 128)
1300 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1301 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1302
1303 case 'a': // Address register
1304 if (VT == MVT::i64)
1305 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1306 else if (VT == MVT::i128)
1307 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1308 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1309
1310 case 'h': // High-part register (an LLVM extension)
1311 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1312
1313 case 'f': // Floating-point register
1314 if (!useSoftFloat()) {
1315 if (VT.getSizeInBits() == 64)
1316 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1317 else if (VT.getSizeInBits() == 128)
1318 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1319 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1320 }
1321 break;
1322
1323 case 'v': // Vector register
1324 if (Subtarget.hasVector()) {
1325 if (VT.getSizeInBits() == 32)
1326 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1327 if (VT.getSizeInBits() == 64)
1328 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1329 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1330 }
1331 break;
1332 }
1333 }
1334 if (Constraint.starts_with("{")) {
1335
1336 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1337 // to check the size on.
1338 auto getVTSizeInBits = [&VT]() {
1339 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1340 };
1341
1342 // We need to override the default register parsing for GPRs and FPRs
1343 // because the interpretation depends on VT. The internal names of
1344 // the registers are also different from the external names
1345 // (F0D and F0S instead of F0, etc.).
1346 if (Constraint[1] == 'r') {
1347 if (getVTSizeInBits() == 32)
1348 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1350 if (getVTSizeInBits() == 128)
1351 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1353 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1355 }
1356 if (Constraint[1] == 'f') {
1357 if (useSoftFloat())
1358 return std::make_pair(
1359 0u, static_cast<const TargetRegisterClass *>(nullptr));
1360 if (getVTSizeInBits() == 32)
1361 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1363 if (getVTSizeInBits() == 128)
1364 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1366 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1368 }
1369 if (Constraint[1] == 'v') {
1370 if (!Subtarget.hasVector())
1371 return std::make_pair(
1372 0u, static_cast<const TargetRegisterClass *>(nullptr));
1373 if (getVTSizeInBits() == 32)
1374 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1376 if (getVTSizeInBits() == 64)
1377 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1379 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1381 }
1382 }
1383 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1384}
1385
1386// FIXME? Maybe this could be a TableGen attribute on some registers and
1387// this table could be generated automatically from RegInfo.
1390 const MachineFunction &MF) const {
1391 Register Reg =
1393 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D : 0)
1394 .Case("r15", Subtarget.isTargetELF() ? SystemZ::R15D : 0)
1395 .Default(0);
1396
1397 if (Reg)
1398 return Reg;
1399 report_fatal_error("Invalid register name global variable");
1400}
1401
1403 const Constant *PersonalityFn) const {
1404 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1405}
1406
1408 const Constant *PersonalityFn) const {
1409 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1410}
1411
1413 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1414 SelectionDAG &DAG) const {
1415 // Only support length 1 constraints for now.
1416 if (Constraint.size() == 1) {
1417 switch (Constraint[0]) {
1418 case 'I': // Unsigned 8-bit constant
1419 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1420 if (isUInt<8>(C->getZExtValue()))
1421 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1422 Op.getValueType()));
1423 return;
1424
1425 case 'J': // Unsigned 12-bit constant
1426 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1427 if (isUInt<12>(C->getZExtValue()))
1428 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1429 Op.getValueType()));
1430 return;
1431
1432 case 'K': // Signed 16-bit constant
1433 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1434 if (isInt<16>(C->getSExtValue()))
1435 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1436 Op.getValueType()));
1437 return;
1438
1439 case 'L': // Signed 20-bit displacement (on all targets we support)
1440 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1441 if (isInt<20>(C->getSExtValue()))
1442 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1443 Op.getValueType()));
1444 return;
1445
1446 case 'M': // 0x7fffffff
1447 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1448 if (C->getZExtValue() == 0x7fffffff)
1449 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1450 Op.getValueType()));
1451 return;
1452 }
1453 }
1454 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1455}
1456
1457//===----------------------------------------------------------------------===//
1458// Calling conventions
1459//===----------------------------------------------------------------------===//
1460
1461#include "SystemZGenCallingConv.inc"
1462
1464 CallingConv::ID) const {
1465 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1466 SystemZ::R14D, 0 };
1467 return ScratchRegs;
1468}
1469
1471 Type *ToType) const {
1472 return isTruncateFree(FromType, ToType);
1473}
1474
1476 return CI->isTailCall();
1477}
1478
1479// Value is a value that has been passed to us in the location described by VA
1480// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1481// any loads onto Chain.
1483 CCValAssign &VA, SDValue Chain,
1484 SDValue Value) {
1485 // If the argument has been promoted from a smaller type, insert an
1486 // assertion to capture this.
1487 if (VA.getLocInfo() == CCValAssign::SExt)
1489 DAG.getValueType(VA.getValVT()));
1490 else if (VA.getLocInfo() == CCValAssign::ZExt)
1492 DAG.getValueType(VA.getValVT()));
1493
1494 if (VA.isExtInLoc())
1495 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1496 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1497 // If this is a short vector argument loaded from the stack,
1498 // extend from i64 to full vector size and then bitcast.
1499 assert(VA.getLocVT() == MVT::i64);
1500 assert(VA.getValVT().isVector());
1501 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1502 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1503 } else
1504 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1505 return Value;
1506}
1507
1508// Value is a value of type VA.getValVT() that we need to copy into
1509// the location described by VA. Return a copy of Value converted to
1510// VA.getValVT(). The caller is responsible for handling indirect values.
1512 CCValAssign &VA, SDValue Value) {
1513 switch (VA.getLocInfo()) {
1514 case CCValAssign::SExt:
1515 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1516 case CCValAssign::ZExt:
1517 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1518 case CCValAssign::AExt:
1519 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1520 case CCValAssign::BCvt: {
1521 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1522 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1523 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1524 // For an f32 vararg we need to first promote it to an f64 and then
1525 // bitcast it to an i64.
1526 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1527 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1528 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1529 ? MVT::v2i64
1530 : VA.getLocVT();
1531 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1532 // For ELF, this is a short vector argument to be stored to the stack,
1533 // bitcast to v2i64 and then extract first element.
1534 if (BitCastToType == MVT::v2i64)
1535 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1536 DAG.getConstant(0, DL, MVT::i32));
1537 return Value;
1538 }
1539 case CCValAssign::Full:
1540 return Value;
1541 default:
1542 llvm_unreachable("Unhandled getLocInfo()");
1543 }
1544}
1545
1547 SDLoc DL(In);
1548 SDValue Lo, Hi;
1549 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1550 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1551 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1552 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1553 DAG.getConstant(64, DL, MVT::i32)));
1554 } else {
1555 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1556 }
1557
1558 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1559 MVT::Untyped, Hi, Lo);
1560 return SDValue(Pair, 0);
1561}
1562
1564 SDLoc DL(In);
1565 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1566 DL, MVT::i64, In);
1567 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1568 DL, MVT::i64, In);
1569
1570 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1571 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1572 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1573 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1574 DAG.getConstant(64, DL, MVT::i32));
1575 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1576 } else {
1577 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1578 }
1579}
1580
1582 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1583 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1584 EVT ValueVT = Val.getValueType();
1585 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1586 // Inline assembly operand.
1587 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1588 return true;
1589 }
1590
1591 return false;
1592}
1593
1595 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1596 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1597 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1598 // Inline assembly operand.
1599 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1600 return DAG.getBitcast(ValueVT, Res);
1601 }
1602
1603 return SDValue();
1604}
1605
1607 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1608 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1609 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1611 MachineFrameInfo &MFI = MF.getFrameInfo();
1613 SystemZMachineFunctionInfo *FuncInfo =
1615 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1616 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1617
1618 // Assign locations to all of the incoming arguments.
1620 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1621 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1622 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
1623
1624 unsigned NumFixedGPRs = 0;
1625 unsigned NumFixedFPRs = 0;
1626 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1627 SDValue ArgValue;
1628 CCValAssign &VA = ArgLocs[I];
1629 EVT LocVT = VA.getLocVT();
1630 if (VA.isRegLoc()) {
1631 // Arguments passed in registers
1632 const TargetRegisterClass *RC;
1633 switch (LocVT.getSimpleVT().SimpleTy) {
1634 default:
1635 // Integers smaller than i64 should be promoted to i64.
1636 llvm_unreachable("Unexpected argument type");
1637 case MVT::i32:
1638 NumFixedGPRs += 1;
1639 RC = &SystemZ::GR32BitRegClass;
1640 break;
1641 case MVT::i64:
1642 NumFixedGPRs += 1;
1643 RC = &SystemZ::GR64BitRegClass;
1644 break;
1645 case MVT::f32:
1646 NumFixedFPRs += 1;
1647 RC = &SystemZ::FP32BitRegClass;
1648 break;
1649 case MVT::f64:
1650 NumFixedFPRs += 1;
1651 RC = &SystemZ::FP64BitRegClass;
1652 break;
1653 case MVT::f128:
1654 NumFixedFPRs += 2;
1655 RC = &SystemZ::FP128BitRegClass;
1656 break;
1657 case MVT::v16i8:
1658 case MVT::v8i16:
1659 case MVT::v4i32:
1660 case MVT::v2i64:
1661 case MVT::v4f32:
1662 case MVT::v2f64:
1663 RC = &SystemZ::VR128BitRegClass;
1664 break;
1665 }
1666
1667 Register VReg = MRI.createVirtualRegister(RC);
1668 MRI.addLiveIn(VA.getLocReg(), VReg);
1669 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1670 } else {
1671 assert(VA.isMemLoc() && "Argument not register or memory");
1672
1673 // Create the frame index object for this incoming parameter.
1674 // FIXME: Pre-include call frame size in the offset, should not
1675 // need to manually add it here.
1676 int64_t ArgSPOffset = VA.getLocMemOffset();
1677 if (Subtarget.isTargetXPLINK64()) {
1678 auto &XPRegs =
1680 ArgSPOffset += XPRegs.getCallFrameSize();
1681 }
1682 int FI =
1683 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1684
1685 // Create the SelectionDAG nodes corresponding to a load
1686 // from this parameter. Unpromoted ints and floats are
1687 // passed as right-justified 8-byte values.
1688 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1689 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1690 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1691 DAG.getIntPtrConstant(4, DL));
1692 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1694 }
1695
1696 // Convert the value of the argument register into the value that's
1697 // being passed.
1698 if (VA.getLocInfo() == CCValAssign::Indirect) {
1699 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1701 // If the original argument was split (e.g. i128), we need
1702 // to load all parts of it here (using the same address).
1703 unsigned ArgIndex = Ins[I].OrigArgIndex;
1704 assert (Ins[I].PartOffset == 0);
1705 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1706 CCValAssign &PartVA = ArgLocs[I + 1];
1707 unsigned PartOffset = Ins[I + 1].PartOffset;
1708 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1709 DAG.getIntPtrConstant(PartOffset, DL));
1710 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1712 ++I;
1713 }
1714 } else
1715 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1716 }
1717
1718 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
1719 // Save the number of non-varargs registers for later use by va_start, etc.
1720 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1721 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1722
1723 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1724 Subtarget.getSpecialRegisters());
1725
1726 // Likewise the address (in the form of a frame index) of where the
1727 // first stack vararg would be. The 1-byte size here is arbitrary.
1728 // FIXME: Pre-include call frame size in the offset, should not
1729 // need to manually add it here.
1730 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
1731 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
1732 FuncInfo->setVarArgsFrameIndex(FI);
1733 }
1734
1735 if (IsVarArg && Subtarget.isTargetELF()) {
1736 // Save the number of non-varargs registers for later use by va_start, etc.
1737 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1738 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1739
1740 // Likewise the address (in the form of a frame index) of where the
1741 // first stack vararg would be. The 1-byte size here is arbitrary.
1742 int64_t VarArgsOffset = CCInfo.getStackSize();
1743 FuncInfo->setVarArgsFrameIndex(
1744 MFI.CreateFixedObject(1, VarArgsOffset, true));
1745
1746 // ...and a similar frame index for the caller-allocated save area
1747 // that will be used to store the incoming registers.
1748 int64_t RegSaveOffset =
1749 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1750 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1751 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1752
1753 // Store the FPR varargs in the reserved frame slots. (We store the
1754 // GPRs as part of the prologue.)
1755 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1757 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1758 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1759 int FI =
1761 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1763 &SystemZ::FP64BitRegClass);
1764 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1765 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1767 }
1768 // Join the stores, which are independent of one another.
1769 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1770 ArrayRef(&MemOps[NumFixedFPRs],
1771 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
1772 }
1773 }
1774
1775 if (Subtarget.isTargetXPLINK64()) {
1776 // Create virual register for handling incoming "ADA" special register (R5)
1777 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
1778 Register ADAvReg = MRI.createVirtualRegister(RC);
1779 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1780 Subtarget.getSpecialRegisters());
1781 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
1782 FuncInfo->setADAVirtualRegister(ADAvReg);
1783 }
1784 return Chain;
1785}
1786
1787static bool canUseSiblingCall(const CCState &ArgCCInfo,
1790 // Punt if there are any indirect or stack arguments, or if the call
1791 // needs the callee-saved argument register R6, or if the call uses
1792 // the callee-saved register arguments SwiftSelf and SwiftError.
1793 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1794 CCValAssign &VA = ArgLocs[I];
1796 return false;
1797 if (!VA.isRegLoc())
1798 return false;
1799 Register Reg = VA.getLocReg();
1800 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1801 return false;
1802 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1803 return false;
1804 }
1805 return true;
1806}
1807
1809 unsigned Offset, bool LoadAdr = false) {
1812 unsigned ADAvReg = MFI->getADAVirtualRegister();
1814
1815 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
1816 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
1817
1818 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
1819 if (!LoadAdr)
1820 Result = DAG.getLoad(
1821 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
1823
1824 return Result;
1825}
1826
1827// ADA access using Global value
1828// Note: for functions, address of descriptor is returned
1830 EVT PtrVT) {
1831 unsigned ADAtype;
1832 bool LoadAddr = false;
1833 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
1834 bool IsFunction =
1835 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
1836 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
1837
1838 if (IsFunction) {
1839 if (IsInternal) {
1841 LoadAddr = true;
1842 } else
1844 } else {
1846 }
1847 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
1848
1849 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
1850}
1851
1852static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
1853 SDLoc &DL, SDValue &Chain) {
1854 unsigned ADADelta = 0; // ADA offset in desc.
1855 unsigned EPADelta = 8; // EPA offset in desc.
1858
1859 // XPLink calling convention.
1860 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1861 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
1862 G->getGlobal()->hasPrivateLinkage());
1863 if (IsInternal) {
1866 unsigned ADAvReg = MFI->getADAVirtualRegister();
1867 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
1868 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1869 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1870 return true;
1871 } else {
1873 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
1874 ADA = getADAEntry(DAG, GA, DL, ADADelta);
1875 Callee = getADAEntry(DAG, GA, DL, EPADelta);
1876 }
1877 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1879 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
1880 ADA = getADAEntry(DAG, ES, DL, ADADelta);
1881 Callee = getADAEntry(DAG, ES, DL, EPADelta);
1882 } else {
1883 // Function pointer case
1884 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
1885 DAG.getConstant(ADADelta, DL, PtrVT));
1886 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
1888 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
1889 DAG.getConstant(EPADelta, DL, PtrVT));
1890 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
1892 }
1893 return false;
1894}
1895
1896SDValue
1898 SmallVectorImpl<SDValue> &InVals) const {
1899 SelectionDAG &DAG = CLI.DAG;
1900 SDLoc &DL = CLI.DL;
1902 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1904 SDValue Chain = CLI.Chain;
1905 SDValue Callee = CLI.Callee;
1906 bool &IsTailCall = CLI.IsTailCall;
1907 CallingConv::ID CallConv = CLI.CallConv;
1908 bool IsVarArg = CLI.IsVarArg;
1910 EVT PtrVT = getPointerTy(MF.getDataLayout());
1911 LLVMContext &Ctx = *DAG.getContext();
1913
1914 // FIXME: z/OS support to be added in later.
1915 if (Subtarget.isTargetXPLINK64())
1916 IsTailCall = false;
1917
1918 // Analyze the operands of the call, assigning locations to each operand.
1920 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1921 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1922
1923 // We don't support GuaranteedTailCallOpt, only automatically-detected
1924 // sibling calls.
1925 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1926 IsTailCall = false;
1927
1928 // Get a count of how many bytes are to be pushed on the stack.
1929 unsigned NumBytes = ArgCCInfo.getStackSize();
1930
1931 // Mark the start of the call.
1932 if (!IsTailCall)
1933 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1934
1935 // Copy argument values to their designated locations.
1937 SmallVector<SDValue, 8> MemOpChains;
1938 SDValue StackPtr;
1939 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1940 CCValAssign &VA = ArgLocs[I];
1941 SDValue ArgValue = OutVals[I];
1942
1943 if (VA.getLocInfo() == CCValAssign::Indirect) {
1944 // Store the argument in a stack slot and pass its address.
1945 unsigned ArgIndex = Outs[I].OrigArgIndex;
1946 EVT SlotVT;
1947 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1948 // Allocate the full stack space for a promoted (and split) argument.
1949 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1950 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1951 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1952 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1953 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1954 } else {
1955 SlotVT = Outs[I].VT;
1956 }
1957 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1958 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1959 MemOpChains.push_back(
1960 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1962 // If the original argument was split (e.g. i128), we need
1963 // to store all parts of it here (and pass just one address).
1964 assert (Outs[I].PartOffset == 0);
1965 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1966 SDValue PartValue = OutVals[I + 1];
1967 unsigned PartOffset = Outs[I + 1].PartOffset;
1968 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1969 DAG.getIntPtrConstant(PartOffset, DL));
1970 MemOpChains.push_back(
1971 DAG.getStore(Chain, DL, PartValue, Address,
1973 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
1974 SlotVT.getStoreSize()) && "Not enough space for argument part!");
1975 ++I;
1976 }
1977 ArgValue = SpillSlot;
1978 } else
1979 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1980
1981 if (VA.isRegLoc()) {
1982 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
1983 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
1984 // and low values.
1985 if (VA.getLocVT() == MVT::i128)
1986 ArgValue = lowerI128ToGR128(DAG, ArgValue);
1987 // Queue up the argument copies and emit them at the end.
1988 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1989 } else {
1990 assert(VA.isMemLoc() && "Argument not register or memory");
1991
1992 // Work out the address of the stack slot. Unpromoted ints and
1993 // floats are passed as right-justified 8-byte values.
1994 if (!StackPtr.getNode())
1995 StackPtr = DAG.getCopyFromReg(Chain, DL,
1996 Regs->getStackPointerRegister(), PtrVT);
1997 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
1998 VA.getLocMemOffset();
1999 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
2000 Offset += 4;
2001 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2003
2004 // Emit the store.
2005 MemOpChains.push_back(
2006 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2007
2008 // Although long doubles or vectors are passed through the stack when
2009 // they are vararg (non-fixed arguments), if a long double or vector
2010 // occupies the third and fourth slot of the argument list GPR3 should
2011 // still shadow the third slot of the argument list.
2012 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2013 SDValue ShadowArgValue =
2014 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2015 DAG.getIntPtrConstant(1, DL));
2016 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2017 }
2018 }
2019 }
2020
2021 // Join the stores, which are independent of one another.
2022 if (!MemOpChains.empty())
2023 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2024
2025 // Accept direct calls by converting symbolic call addresses to the
2026 // associated Target* opcodes. Force %r1 to be used for indirect
2027 // tail calls.
2028 SDValue Glue;
2029
2030 if (Subtarget.isTargetXPLINK64()) {
2031 SDValue ADA;
2032 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2033 if (!IsBRASL) {
2034 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2035 ->getAddressOfCalleeRegister();
2036 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2037 Glue = Chain.getValue(1);
2038 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2039 }
2040 RegsToPass.push_back(std::make_pair(
2041 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2042 } else {
2043 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2044 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2045 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2046 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2047 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2048 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2049 } else if (IsTailCall) {
2050 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2051 Glue = Chain.getValue(1);
2052 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2053 }
2054 }
2055
2056 // Build a sequence of copy-to-reg nodes, chained and glued together.
2057 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
2058 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
2059 RegsToPass[I].second, Glue);
2060 Glue = Chain.getValue(1);
2061 }
2062
2063 // The first call operand is the chain and the second is the target address.
2065 Ops.push_back(Chain);
2066 Ops.push_back(Callee);
2067
2068 // Add argument registers to the end of the list so that they are
2069 // known live into the call.
2070 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
2071 Ops.push_back(DAG.getRegister(RegsToPass[I].first,
2072 RegsToPass[I].second.getValueType()));
2073
2074 // Add a register mask operand representing the call-preserved registers.
2075 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2076 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2077 assert(Mask && "Missing call preserved mask for calling convention");
2078 Ops.push_back(DAG.getRegisterMask(Mask));
2079
2080 // Glue the call to the argument copies, if any.
2081 if (Glue.getNode())
2082 Ops.push_back(Glue);
2083
2084 // Emit the call.
2085 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2086 if (IsTailCall) {
2087 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2088 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2089 return Ret;
2090 }
2091 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2092 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2093 Glue = Chain.getValue(1);
2094
2095 // Mark the end of the call, which is glued to the call itself.
2096 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2097 Glue = Chain.getValue(1);
2098
2099 // Assign locations to each value returned by this call.
2101 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2102 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2103
2104 // Copy all of the result registers out of their specified physreg.
2105 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2106 CCValAssign &VA = RetLocs[I];
2107
2108 // Copy the value out, gluing the copy to the end of the call sequence.
2109 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2110 VA.getLocVT(), Glue);
2111 Chain = RetValue.getValue(1);
2112 Glue = RetValue.getValue(2);
2113
2114 // Convert the value of the return register into the value that's
2115 // being returned.
2116 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2117 }
2118
2119 return Chain;
2120}
2121
2122// Generate a call taking the given operands as arguments and returning a
2123// result of type RetVT.
2125 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2126 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2127 bool DoesNotReturn, bool IsReturnValueUsed) const {
2129 Args.reserve(Ops.size());
2130
2132 for (SDValue Op : Ops) {
2133 Entry.Node = Op;
2134 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
2135 Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
2136 Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
2137 Args.push_back(Entry);
2138 }
2139
2140 SDValue Callee =
2141 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2142
2143 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2145 bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned);
2146 CLI.setDebugLoc(DL)
2147 .setChain(Chain)
2148 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2149 .setNoReturn(DoesNotReturn)
2150 .setDiscardResult(!IsReturnValueUsed)
2151 .setSExtResult(SignExtend)
2152 .setZExtResult(!SignExtend);
2153 return LowerCallTo(CLI);
2154}
2155
2158 MachineFunction &MF, bool isVarArg,
2160 LLVMContext &Context) const {
2161 // Special case that we cannot easily detect in RetCC_SystemZ since
2162 // i128 may not be a legal type.
2163 for (auto &Out : Outs)
2164 if (Out.ArgVT == MVT::i128)
2165 return false;
2166
2168 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
2169 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2170}
2171
2172SDValue
2174 bool IsVarArg,
2176 const SmallVectorImpl<SDValue> &OutVals,
2177 const SDLoc &DL, SelectionDAG &DAG) const {
2179
2180 // Assign locations to each returned value.
2182 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2183 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2184
2185 // Quick exit for void returns
2186 if (RetLocs.empty())
2187 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2188
2189 if (CallConv == CallingConv::GHC)
2190 report_fatal_error("GHC functions return void only");
2191
2192 // Copy the result values into the output registers.
2193 SDValue Glue;
2195 RetOps.push_back(Chain);
2196 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2197 CCValAssign &VA = RetLocs[I];
2198 SDValue RetValue = OutVals[I];
2199
2200 // Make the return register live on exit.
2201 assert(VA.isRegLoc() && "Can only return in registers!");
2202
2203 // Promote the value as required.
2204 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2205
2206 // Chain and glue the copies together.
2207 Register Reg = VA.getLocReg();
2208 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2209 Glue = Chain.getValue(1);
2210 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2211 }
2212
2213 // Update chain and glue.
2214 RetOps[0] = Chain;
2215 if (Glue.getNode())
2216 RetOps.push_back(Glue);
2217
2218 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2219}
2220
2221// Return true if Op is an intrinsic node with chain that returns the CC value
2222// as its only (other) argument. Provide the associated SystemZISD opcode and
2223// the mask of valid CC values if so.
2224static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2225 unsigned &CCValid) {
2226 unsigned Id = Op.getConstantOperandVal(1);
2227 switch (Id) {
2228 case Intrinsic::s390_tbegin:
2229 Opcode = SystemZISD::TBEGIN;
2230 CCValid = SystemZ::CCMASK_TBEGIN;
2231 return true;
2232
2233 case Intrinsic::s390_tbegin_nofloat:
2235 CCValid = SystemZ::CCMASK_TBEGIN;
2236 return true;
2237
2238 case Intrinsic::s390_tend:
2239 Opcode = SystemZISD::TEND;
2240 CCValid = SystemZ::CCMASK_TEND;
2241 return true;
2242
2243 default:
2244 return false;
2245 }
2246}
2247
2248// Return true if Op is an intrinsic node without chain that returns the
2249// CC value as its final argument. Provide the associated SystemZISD
2250// opcode and the mask of valid CC values if so.
2251static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2252 unsigned Id = Op.getConstantOperandVal(0);
2253 switch (Id) {
2254 case Intrinsic::s390_vpkshs:
2255 case Intrinsic::s390_vpksfs:
2256 case Intrinsic::s390_vpksgs:
2257 Opcode = SystemZISD::PACKS_CC;
2258 CCValid = SystemZ::CCMASK_VCMP;
2259 return true;
2260
2261 case Intrinsic::s390_vpklshs:
2262 case Intrinsic::s390_vpklsfs:
2263 case Intrinsic::s390_vpklsgs:
2264 Opcode = SystemZISD::PACKLS_CC;
2265 CCValid = SystemZ::CCMASK_VCMP;
2266 return true;
2267
2268 case Intrinsic::s390_vceqbs:
2269 case Intrinsic::s390_vceqhs:
2270 case Intrinsic::s390_vceqfs:
2271 case Intrinsic::s390_vceqgs:
2272 Opcode = SystemZISD::VICMPES;
2273 CCValid = SystemZ::CCMASK_VCMP;
2274 return true;
2275
2276 case Intrinsic::s390_vchbs:
2277 case Intrinsic::s390_vchhs:
2278 case Intrinsic::s390_vchfs:
2279 case Intrinsic::s390_vchgs:
2280 Opcode = SystemZISD::VICMPHS;
2281 CCValid = SystemZ::CCMASK_VCMP;
2282 return true;
2283
2284 case Intrinsic::s390_vchlbs:
2285 case Intrinsic::s390_vchlhs:
2286 case Intrinsic::s390_vchlfs:
2287 case Intrinsic::s390_vchlgs:
2288 Opcode = SystemZISD::VICMPHLS;
2289 CCValid = SystemZ::CCMASK_VCMP;
2290 return true;
2291
2292 case Intrinsic::s390_vtm:
2293 Opcode = SystemZISD::VTM;
2294 CCValid = SystemZ::CCMASK_VCMP;
2295 return true;
2296
2297 case Intrinsic::s390_vfaebs:
2298 case Intrinsic::s390_vfaehs:
2299 case Intrinsic::s390_vfaefs:
2300 Opcode = SystemZISD::VFAE_CC;
2301 CCValid = SystemZ::CCMASK_ANY;
2302 return true;
2303
2304 case Intrinsic::s390_vfaezbs:
2305 case Intrinsic::s390_vfaezhs:
2306 case Intrinsic::s390_vfaezfs:
2307 Opcode = SystemZISD::VFAEZ_CC;
2308 CCValid = SystemZ::CCMASK_ANY;
2309 return true;
2310
2311 case Intrinsic::s390_vfeebs:
2312 case Intrinsic::s390_vfeehs:
2313 case Intrinsic::s390_vfeefs:
2314 Opcode = SystemZISD::VFEE_CC;
2315 CCValid = SystemZ::CCMASK_ANY;
2316 return true;
2317
2318 case Intrinsic::s390_vfeezbs:
2319 case Intrinsic::s390_vfeezhs:
2320 case Intrinsic::s390_vfeezfs:
2321 Opcode = SystemZISD::VFEEZ_CC;
2322 CCValid = SystemZ::CCMASK_ANY;
2323 return true;
2324
2325 case Intrinsic::s390_vfenebs:
2326 case Intrinsic::s390_vfenehs:
2327 case Intrinsic::s390_vfenefs:
2328 Opcode = SystemZISD::VFENE_CC;
2329 CCValid = SystemZ::CCMASK_ANY;
2330 return true;
2331
2332 case Intrinsic::s390_vfenezbs:
2333 case Intrinsic::s390_vfenezhs:
2334 case Intrinsic::s390_vfenezfs:
2335 Opcode = SystemZISD::VFENEZ_CC;
2336 CCValid = SystemZ::CCMASK_ANY;
2337 return true;
2338
2339 case Intrinsic::s390_vistrbs:
2340 case Intrinsic::s390_vistrhs:
2341 case Intrinsic::s390_vistrfs:
2342 Opcode = SystemZISD::VISTR_CC;
2344 return true;
2345
2346 case Intrinsic::s390_vstrcbs:
2347 case Intrinsic::s390_vstrchs:
2348 case Intrinsic::s390_vstrcfs:
2349 Opcode = SystemZISD::VSTRC_CC;
2350 CCValid = SystemZ::CCMASK_ANY;
2351 return true;
2352
2353 case Intrinsic::s390_vstrczbs:
2354 case Intrinsic::s390_vstrczhs:
2355 case Intrinsic::s390_vstrczfs:
2356 Opcode = SystemZISD::VSTRCZ_CC;
2357 CCValid = SystemZ::CCMASK_ANY;
2358 return true;
2359
2360 case Intrinsic::s390_vstrsb:
2361 case Intrinsic::s390_vstrsh:
2362 case Intrinsic::s390_vstrsf:
2363 Opcode = SystemZISD::VSTRS_CC;
2364 CCValid = SystemZ::CCMASK_ANY;
2365 return true;
2366
2367 case Intrinsic::s390_vstrszb:
2368 case Intrinsic::s390_vstrszh:
2369 case Intrinsic::s390_vstrszf:
2370 Opcode = SystemZISD::VSTRSZ_CC;
2371 CCValid = SystemZ::CCMASK_ANY;
2372 return true;
2373
2374 case Intrinsic::s390_vfcedbs:
2375 case Intrinsic::s390_vfcesbs:
2376 Opcode = SystemZISD::VFCMPES;
2377 CCValid = SystemZ::CCMASK_VCMP;
2378 return true;
2379
2380 case Intrinsic::s390_vfchdbs:
2381 case Intrinsic::s390_vfchsbs:
2382 Opcode = SystemZISD::VFCMPHS;
2383 CCValid = SystemZ::CCMASK_VCMP;
2384 return true;
2385
2386 case Intrinsic::s390_vfchedbs:
2387 case Intrinsic::s390_vfchesbs:
2388 Opcode = SystemZISD::VFCMPHES;
2389 CCValid = SystemZ::CCMASK_VCMP;
2390 return true;
2391
2392 case Intrinsic::s390_vftcidb:
2393 case Intrinsic::s390_vftcisb:
2394 Opcode = SystemZISD::VFTCI;
2395 CCValid = SystemZ::CCMASK_VCMP;
2396 return true;
2397
2398 case Intrinsic::s390_tdc:
2399 Opcode = SystemZISD::TDC;
2400 CCValid = SystemZ::CCMASK_TDC;
2401 return true;
2402
2403 default:
2404 return false;
2405 }
2406}
2407
2408// Emit an intrinsic with chain and an explicit CC register result.
2410 unsigned Opcode) {
2411 // Copy all operands except the intrinsic ID.
2412 unsigned NumOps = Op.getNumOperands();
2414 Ops.reserve(NumOps - 1);
2415 Ops.push_back(Op.getOperand(0));
2416 for (unsigned I = 2; I < NumOps; ++I)
2417 Ops.push_back(Op.getOperand(I));
2418
2419 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2420 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2421 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2422 SDValue OldChain = SDValue(Op.getNode(), 1);
2423 SDValue NewChain = SDValue(Intr.getNode(), 1);
2424 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2425 return Intr.getNode();
2426}
2427
2428// Emit an intrinsic with an explicit CC register result.
2430 unsigned Opcode) {
2431 // Copy all operands except the intrinsic ID.
2432 unsigned NumOps = Op.getNumOperands();
2434 Ops.reserve(NumOps - 1);
2435 for (unsigned I = 1; I < NumOps; ++I)
2436 Ops.push_back(Op.getOperand(I));
2437
2438 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2439 return Intr.getNode();
2440}
2441
2442// CC is a comparison that will be implemented using an integer or
2443// floating-point comparison. Return the condition code mask for
2444// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2445// unsigned comparisons and clear for signed ones. In the floating-point
2446// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2448#define CONV(X) \
2449 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2450 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2451 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2452
2453 switch (CC) {
2454 default:
2455 llvm_unreachable("Invalid integer condition!");
2456
2457 CONV(EQ);
2458 CONV(NE);
2459 CONV(GT);
2460 CONV(GE);
2461 CONV(LT);
2462 CONV(LE);
2463
2464 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2466 }
2467#undef CONV
2468}
2469
2470// If C can be converted to a comparison against zero, adjust the operands
2471// as necessary.
2472static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2473 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2474 return;
2475
2476 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2477 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2478 return;
2479
2480 int64_t Value = ConstOp1->getSExtValue();
2481 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2482 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2483 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2484 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2485 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2486 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2487 }
2488}
2489
2490// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2491// adjust the operands as necessary.
2492static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2493 Comparison &C) {
2494 // For us to make any changes, it must a comparison between a single-use
2495 // load and a constant.
2496 if (!C.Op0.hasOneUse() ||
2497 C.Op0.getOpcode() != ISD::LOAD ||
2498 C.Op1.getOpcode() != ISD::Constant)
2499 return;
2500
2501 // We must have an 8- or 16-bit load.
2502 auto *Load = cast<LoadSDNode>(C.Op0);
2503 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2504 if ((NumBits != 8 && NumBits != 16) ||
2505 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2506 return;
2507
2508 // The load must be an extending one and the constant must be within the
2509 // range of the unextended value.
2510 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2511 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2512 return;
2513 uint64_t Value = ConstOp1->getZExtValue();
2514 uint64_t Mask = (1 << NumBits) - 1;
2515 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2516 // Make sure that ConstOp1 is in range of C.Op0.
2517 int64_t SignedValue = ConstOp1->getSExtValue();
2518 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2519 return;
2520 if (C.ICmpType != SystemZICMP::SignedOnly) {
2521 // Unsigned comparison between two sign-extended values is equivalent
2522 // to unsigned comparison between two zero-extended values.
2523 Value &= Mask;
2524 } else if (NumBits == 8) {
2525 // Try to treat the comparison as unsigned, so that we can use CLI.
2526 // Adjust CCMask and Value as necessary.
2527 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2528 // Test whether the high bit of the byte is set.
2529 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2530 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2531 // Test whether the high bit of the byte is clear.
2532 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2533 else
2534 // No instruction exists for this combination.
2535 return;
2536 C.ICmpType = SystemZICMP::UnsignedOnly;
2537 }
2538 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2539 if (Value > Mask)
2540 return;
2541 // If the constant is in range, we can use any comparison.
2542 C.ICmpType = SystemZICMP::Any;
2543 } else
2544 return;
2545
2546 // Make sure that the first operand is an i32 of the right extension type.
2547 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2550 if (C.Op0.getValueType() != MVT::i32 ||
2551 Load->getExtensionType() != ExtType) {
2552 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2553 Load->getBasePtr(), Load->getPointerInfo(),
2554 Load->getMemoryVT(), Load->getAlign(),
2555 Load->getMemOperand()->getFlags());
2556 // Update the chain uses.
2557 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2558 }
2559
2560 // Make sure that the second operand is an i32 with the right value.
2561 if (C.Op1.getValueType() != MVT::i32 ||
2562 Value != ConstOp1->getZExtValue())
2563 C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2564}
2565
2566// Return true if Op is either an unextended load, or a load suitable
2567// for integer register-memory comparisons of type ICmpType.
2568static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2569 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2570 if (Load) {
2571 // There are no instructions to compare a register with a memory byte.
2572 if (Load->getMemoryVT() == MVT::i8)
2573 return false;
2574 // Otherwise decide on extension type.
2575 switch (Load->getExtensionType()) {
2576 case ISD::NON_EXTLOAD:
2577 return true;
2578 case ISD::SEXTLOAD:
2579 return ICmpType != SystemZICMP::UnsignedOnly;
2580 case ISD::ZEXTLOAD:
2581 return ICmpType != SystemZICMP::SignedOnly;
2582 default:
2583 break;
2584 }
2585 }
2586 return false;
2587}
2588
2589// Return true if it is better to swap the operands of C.
2590static bool shouldSwapCmpOperands(const Comparison &C) {
2591 // Leave i128 and f128 comparisons alone, since they have no memory forms.
2592 if (C.Op0.getValueType() == MVT::i128)
2593 return false;
2594 if (C.Op0.getValueType() == MVT::f128)
2595 return false;
2596
2597 // Always keep a floating-point constant second, since comparisons with
2598 // zero can use LOAD TEST and comparisons with other constants make a
2599 // natural memory operand.
2600 if (isa<ConstantFPSDNode>(C.Op1))
2601 return false;
2602
2603 // Never swap comparisons with zero since there are many ways to optimize
2604 // those later.
2605 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2606 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2607 return false;
2608
2609 // Also keep natural memory operands second if the loaded value is
2610 // only used here. Several comparisons have memory forms.
2611 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2612 return false;
2613
2614 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2615 // In that case we generally prefer the memory to be second.
2616 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2617 // The only exceptions are when the second operand is a constant and
2618 // we can use things like CHHSI.
2619 if (!ConstOp1)
2620 return true;
2621 // The unsigned memory-immediate instructions can handle 16-bit
2622 // unsigned integers.
2623 if (C.ICmpType != SystemZICMP::SignedOnly &&
2624 isUInt<16>(ConstOp1->getZExtValue()))
2625 return false;
2626 // The signed memory-immediate instructions can handle 16-bit
2627 // signed integers.
2628 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2629 isInt<16>(ConstOp1->getSExtValue()))
2630 return false;
2631 return true;
2632 }
2633
2634 // Try to promote the use of CGFR and CLGFR.
2635 unsigned Opcode0 = C.Op0.getOpcode();
2636 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2637 return true;
2638 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2639 return true;
2640 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
2641 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2642 C.Op0.getConstantOperandVal(1) == 0xffffffff)
2643 return true;
2644
2645 return false;
2646}
2647
2648// Check whether C tests for equality between X and Y and whether X - Y
2649// or Y - X is also computed. In that case it's better to compare the
2650// result of the subtraction against zero.
2652 Comparison &C) {
2653 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2654 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2655 for (SDNode *N : C.Op0->uses()) {
2656 if (N->getOpcode() == ISD::SUB &&
2657 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2658 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2659 // Disable the nsw and nuw flags: the backend needs to handle
2660 // overflow as well during comparison elimination.
2661 SDNodeFlags Flags = N->getFlags();
2662 Flags.setNoSignedWrap(false);
2663 Flags.setNoUnsignedWrap(false);
2664 N->setFlags(Flags);
2665 C.Op0 = SDValue(N, 0);
2666 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2667 return;
2668 }
2669 }
2670 }
2671}
2672
2673// Check whether C compares a floating-point value with zero and if that
2674// floating-point value is also negated. In this case we can use the
2675// negation to set CC, so avoiding separate LOAD AND TEST and
2676// LOAD (NEGATIVE/COMPLEMENT) instructions.
2677static void adjustForFNeg(Comparison &C) {
2678 // This optimization is invalid for strict comparisons, since FNEG
2679 // does not raise any exceptions.
2680 if (C.Chain)
2681 return;
2682 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2683 if (C1 && C1->isZero()) {
2684 for (SDNode *N : C.Op0->uses()) {
2685 if (N->getOpcode() == ISD::FNEG) {
2686 C.Op0 = SDValue(N, 0);
2687 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2688 return;
2689 }
2690 }
2691 }
2692}
2693
2694// Check whether C compares (shl X, 32) with 0 and whether X is
2695// also sign-extended. In that case it is better to test the result
2696// of the sign extension using LTGFR.
2697//
2698// This case is important because InstCombine transforms a comparison
2699// with (sext (trunc X)) into a comparison with (shl X, 32).
2700static void adjustForLTGFR(Comparison &C) {
2701 // Check for a comparison between (shl X, 32) and 0.
2702 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
2703 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
2704 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2705 if (C1 && C1->getZExtValue() == 32) {
2706 SDValue ShlOp0 = C.Op0.getOperand(0);
2707 // See whether X has any SIGN_EXTEND_INREG uses.
2708 for (SDNode *N : ShlOp0->uses()) {
2709 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2710 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2711 C.Op0 = SDValue(N, 0);
2712 return;
2713 }
2714 }
2715 }
2716 }
2717}
2718
2719// If C compares the truncation of an extending load, try to compare
2720// the untruncated value instead. This exposes more opportunities to
2721// reuse CC.
2722static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2723 Comparison &C) {
2724 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2725 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2726 C.Op1.getOpcode() == ISD::Constant &&
2727 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
2728 C.Op1->getAsZExtVal() == 0) {
2729 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2730 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
2731 C.Op0.getValueSizeInBits().getFixedValue()) {
2732 unsigned Type = L->getExtensionType();
2733 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2734 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2735 C.Op0 = C.Op0.getOperand(0);
2736 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2737 }
2738 }
2739 }
2740}
2741
2742// Return true if shift operation N has an in-range constant shift value.
2743// Store it in ShiftVal if so.
2744static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2745 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2746 if (!Shift)
2747 return false;
2748
2749 uint64_t Amount = Shift->getZExtValue();
2750 if (Amount >= N.getValueSizeInBits())
2751 return false;
2752
2753 ShiftVal = Amount;
2754 return true;
2755}
2756
2757// Check whether an AND with Mask is suitable for a TEST UNDER MASK
2758// instruction and whether the CC value is descriptive enough to handle
2759// a comparison of type Opcode between the AND result and CmpVal.
2760// CCMask says which comparison result is being tested and BitSize is
2761// the number of bits in the operands. If TEST UNDER MASK can be used,
2762// return the corresponding CC mask, otherwise return 0.
2763static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2764 uint64_t Mask, uint64_t CmpVal,
2765 unsigned ICmpType) {
2766 assert(Mask != 0 && "ANDs with zero should have been removed by now");
2767
2768 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2769 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2770 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2771 return 0;
2772
2773 // Work out the masks for the lowest and highest bits.
2775 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
2776
2777 // Signed ordered comparisons are effectively unsigned if the sign
2778 // bit is dropped.
2779 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2780
2781 // Check for equality comparisons with 0, or the equivalent.
2782 if (CmpVal == 0) {
2783 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2785 if (CCMask == SystemZ::CCMASK_CMP_NE)
2787 }
2788 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2789 if (CCMask == SystemZ::CCMASK_CMP_LT)
2791 if (CCMask == SystemZ::CCMASK_CMP_GE)
2793 }
2794 if (EffectivelyUnsigned && CmpVal < Low) {
2795 if (CCMask == SystemZ::CCMASK_CMP_LE)
2797 if (CCMask == SystemZ::CCMASK_CMP_GT)
2799 }
2800
2801 // Check for equality comparisons with the mask, or the equivalent.
2802 if (CmpVal == Mask) {
2803 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2805 if (CCMask == SystemZ::CCMASK_CMP_NE)
2807 }
2808 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2809 if (CCMask == SystemZ::CCMASK_CMP_GT)
2811 if (CCMask == SystemZ::CCMASK_CMP_LE)
2813 }
2814 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2815 if (CCMask == SystemZ::CCMASK_CMP_GE)
2817 if (CCMask == SystemZ::CCMASK_CMP_LT)
2819 }
2820
2821 // Check for ordered comparisons with the top bit.
2822 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2823 if (CCMask == SystemZ::CCMASK_CMP_LE)
2825 if (CCMask == SystemZ::CCMASK_CMP_GT)
2827 }
2828 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2829 if (CCMask == SystemZ::CCMASK_CMP_LT)
2831 if (CCMask == SystemZ::CCMASK_CMP_GE)
2833 }
2834
2835 // If there are just two bits, we can do equality checks for Low and High
2836 // as well.
2837 if (Mask == Low + High) {
2838 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2840 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2842 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2844 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2846 }
2847
2848 // Looks like we've exhausted our options.
2849 return 0;
2850}
2851
2852// See whether C can be implemented as a TEST UNDER MASK instruction.
2853// Update the arguments with the TM version if so.
2855 Comparison &C) {
2856 // Use VECTOR TEST UNDER MASK for i128 operations.
2857 if (C.Op0.getValueType() == MVT::i128) {
2858 // We can use VTM for EQ/NE comparisons of x & y against 0.
2859 if (C.Op0.getOpcode() == ISD::AND &&
2860 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2861 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
2862 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
2863 if (Mask && Mask->getAPIntValue() == 0) {
2864 C.Opcode = SystemZISD::VTM;
2865 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
2866 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
2867 C.CCValid = SystemZ::CCMASK_VCMP;
2868 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
2869 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
2870 else
2871 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
2872 }
2873 }
2874 return;
2875 }
2876
2877 // Check that we have a comparison with a constant.
2878 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2879 if (!ConstOp1)
2880 return;
2881 uint64_t CmpVal = ConstOp1->getZExtValue();
2882
2883 // Check whether the nonconstant input is an AND with a constant mask.
2884 Comparison NewC(C);
2885 uint64_t MaskVal;
2886 ConstantSDNode *Mask = nullptr;
2887 if (C.Op0.getOpcode() == ISD::AND) {
2888 NewC.Op0 = C.Op0.getOperand(0);
2889 NewC.Op1 = C.Op0.getOperand(1);
2890 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2891 if (!Mask)
2892 return;
2893 MaskVal = Mask->getZExtValue();
2894 } else {
2895 // There is no instruction to compare with a 64-bit immediate
2896 // so use TMHH instead if possible. We need an unsigned ordered
2897 // comparison with an i64 immediate.
2898 if (NewC.Op0.getValueType() != MVT::i64 ||
2899 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2900 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2901 NewC.ICmpType == SystemZICMP::SignedOnly)
2902 return;
2903 // Convert LE and GT comparisons into LT and GE.
2904 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2905 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2906 if (CmpVal == uint64_t(-1))
2907 return;
2908 CmpVal += 1;
2909 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2910 }
2911 // If the low N bits of Op1 are zero than the low N bits of Op0 can
2912 // be masked off without changing the result.
2913 MaskVal = -(CmpVal & -CmpVal);
2914 NewC.ICmpType = SystemZICMP::UnsignedOnly;
2915 }
2916 if (!MaskVal)
2917 return;
2918
2919 // Check whether the combination of mask, comparison value and comparison
2920 // type are suitable.
2921 unsigned BitSize = NewC.Op0.getValueSizeInBits();
2922 unsigned NewCCMask, ShiftVal;
2923 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2924 NewC.Op0.getOpcode() == ISD::SHL &&
2925 isSimpleShift(NewC.Op0, ShiftVal) &&
2926 (MaskVal >> ShiftVal != 0) &&
2927 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2928 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2929 MaskVal >> ShiftVal,
2930 CmpVal >> ShiftVal,
2931 SystemZICMP::Any))) {
2932 NewC.Op0 = NewC.Op0.getOperand(0);
2933 MaskVal >>= ShiftVal;
2934 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2935 NewC.Op0.getOpcode() == ISD::SRL &&
2936 isSimpleShift(NewC.Op0, ShiftVal) &&
2937 (MaskVal << ShiftVal != 0) &&
2938 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2939 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2940 MaskVal << ShiftVal,
2941 CmpVal << ShiftVal,
2943 NewC.Op0 = NewC.Op0.getOperand(0);
2944 MaskVal <<= ShiftVal;
2945 } else {
2946 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2947 NewC.ICmpType);
2948 if (!NewCCMask)
2949 return;
2950 }
2951
2952 // Go ahead and make the change.
2953 C.Opcode = SystemZISD::TM;
2954 C.Op0 = NewC.Op0;
2955 if (Mask && Mask->getZExtValue() == MaskVal)
2956 C.Op1 = SDValue(Mask, 0);
2957 else
2958 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2959 C.CCValid = SystemZ::CCMASK_TM;
2960 C.CCMask = NewCCMask;
2961}
2962
2963// Implement i128 comparison in vector registers.
2964static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
2965 Comparison &C) {
2966 if (C.Opcode != SystemZISD::ICMP)
2967 return;
2968 if (C.Op0.getValueType() != MVT::i128)
2969 return;
2970
2971 // (In-)Equality comparisons can be implemented via VCEQGS.
2972 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2973 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2974 C.Opcode = SystemZISD::VICMPES;
2975 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
2976 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
2977 C.CCValid = SystemZ::CCMASK_VCMP;
2978 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
2979 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
2980 else
2981 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
2982 return;
2983 }
2984
2985 // Normalize other comparisons to GT.
2986 bool Swap = false, Invert = false;
2987 switch (C.CCMask) {
2988 case SystemZ::CCMASK_CMP_GT: break;
2989 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
2990 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
2991 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
2992 default: llvm_unreachable("Invalid integer condition!");
2993 }
2994 if (Swap)
2995 std::swap(C.Op0, C.Op1);
2996
2997 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2998 C.Opcode = SystemZISD::UCMP128HI;
2999 else
3000 C.Opcode = SystemZISD::SCMP128HI;
3001 C.CCValid = SystemZ::CCMASK_ANY;
3002 C.CCMask = SystemZ::CCMASK_1;
3003
3004 if (Invert)
3005 C.CCMask ^= C.CCValid;
3006}
3007
3008// See whether the comparison argument contains a redundant AND
3009// and remove it if so. This sometimes happens due to the generic
3010// BRCOND expansion.
3012 Comparison &C) {
3013 if (C.Op0.getOpcode() != ISD::AND)
3014 return;
3015 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3016 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3017 return;
3018 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3019 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3020 return;
3021
3022 C.Op0 = C.Op0.getOperand(0);
3023}
3024
3025// Return a Comparison that tests the condition-code result of intrinsic
3026// node Call against constant integer CC using comparison code Cond.
3027// Opcode is the opcode of the SystemZISD operation for the intrinsic
3028// and CCValid is the set of possible condition-code results.
3029static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3030 SDValue Call, unsigned CCValid, uint64_t CC,
3032 Comparison C(Call, SDValue(), SDValue());
3033 C.Opcode = Opcode;
3034 C.CCValid = CCValid;
3035 if (Cond == ISD::SETEQ)
3036 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3037 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3038 else if (Cond == ISD::SETNE)
3039 // ...and the inverse of that.
3040 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3041 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3042 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3043 // always true for CC>3.
3044 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3045 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3046 // ...and the inverse of that.
3047 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3048 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3049 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3050 // always true for CC>3.
3051 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3052 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3053 // ...and the inverse of that.
3054 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3055 else
3056 llvm_unreachable("Unexpected integer comparison type");
3057 C.CCMask &= CCValid;
3058 return C;
3059}
3060
3061// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3062static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3063 ISD::CondCode Cond, const SDLoc &DL,
3064 SDValue Chain = SDValue(),
3065 bool IsSignaling = false) {
3066 if (CmpOp1.getOpcode() == ISD::Constant) {
3067 assert(!Chain);
3068 unsigned Opcode, CCValid;
3069 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3070 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3071 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3072 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3073 CmpOp1->getAsZExtVal(), Cond);
3074 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3075 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3076 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3077 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3078 CmpOp1->getAsZExtVal(), Cond);
3079 }
3080 Comparison C(CmpOp0, CmpOp1, Chain);
3081 C.CCMask = CCMaskForCondCode(Cond);
3082 if (C.Op0.getValueType().isFloatingPoint()) {
3083 C.CCValid = SystemZ::CCMASK_FCMP;
3084 if (!C.Chain)
3085 C.Opcode = SystemZISD::FCMP;
3086 else if (!IsSignaling)
3087 C.Opcode = SystemZISD::STRICT_FCMP;
3088 else
3089 C.Opcode = SystemZISD::STRICT_FCMPS;
3091 } else {
3092 assert(!C.Chain);
3093 C.CCValid = SystemZ::CCMASK_ICMP;
3094 C.Opcode = SystemZISD::ICMP;
3095 // Choose the type of comparison. Equality and inequality tests can
3096 // use either signed or unsigned comparisons. The choice also doesn't
3097 // matter if both sign bits are known to be clear. In those cases we
3098 // want to give the main isel code the freedom to choose whichever
3099 // form fits best.
3100 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3101 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3102 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3103 C.ICmpType = SystemZICMP::Any;
3104 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3105 C.ICmpType = SystemZICMP::UnsignedOnly;
3106 else
3107 C.ICmpType = SystemZICMP::SignedOnly;
3108 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3109 adjustForRedundantAnd(DAG, DL, C);
3110 adjustZeroCmp(DAG, DL, C);
3111 adjustSubwordCmp(DAG, DL, C);
3112 adjustForSubtraction(DAG, DL, C);
3114 adjustICmpTruncate(DAG, DL, C);
3115 }
3116
3117 if (shouldSwapCmpOperands(C)) {
3118 std::swap(C.Op0, C.Op1);
3119 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3120 }
3121
3123 adjustICmp128(DAG, DL, C);
3124 return C;
3125}
3126
3127// Emit the comparison instruction described by C.
3128static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3129 if (!C.Op1.getNode()) {
3130 SDNode *Node;
3131 switch (C.Op0.getOpcode()) {
3133 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3134 return SDValue(Node, 0);
3136 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3137 return SDValue(Node, Node->getNumValues() - 1);
3138 default:
3139 llvm_unreachable("Invalid comparison operands");
3140 }
3141 }
3142 if (C.Opcode == SystemZISD::ICMP)
3143 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3144 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3145 if (C.Opcode == SystemZISD::TM) {
3146 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3148 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3149 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3150 }
3151 if (C.Opcode == SystemZISD::VICMPES) {
3152 SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32);
3153 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3154 return SDValue(Val.getNode(), 1);
3155 }
3156 if (C.Chain) {
3157 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3158 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3159 }
3160 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3161}
3162
3163// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3164// 64 bits. Extend is the extension type to use. Store the high part
3165// in Hi and the low part in Lo.
3166static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3167 SDValue Op0, SDValue Op1, SDValue &Hi,
3168 SDValue &Lo) {
3169 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3170 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3171 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3172 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3173 DAG.getConstant(32, DL, MVT::i64));
3174 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3175 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3176}
3177
3178// Lower a binary operation that produces two VT results, one in each
3179// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3180// and Opcode performs the GR128 operation. Store the even register result
3181// in Even and the odd register result in Odd.
3182static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3183 unsigned Opcode, SDValue Op0, SDValue Op1,
3184 SDValue &Even, SDValue &Odd) {
3185 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3186 bool Is32Bit = is32Bit(VT);
3187 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3188 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3189}
3190
3191// Return an i32 value that is 1 if the CC value produced by CCReg is
3192// in the mask CCMask and 0 otherwise. CC is known to have a value
3193// in CCValid, so other values can be ignored.
3194static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3195 unsigned CCValid, unsigned CCMask) {
3196 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3197 DAG.getConstant(0, DL, MVT::i32),
3198 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3199 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3200 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3201}
3202
3203// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3204// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3205// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3206// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3207// floating-point comparisons.
3210 switch (CC) {
3211 case ISD::SETOEQ:
3212 case ISD::SETEQ:
3213 switch (Mode) {
3214 case CmpMode::Int: return SystemZISD::VICMPE;
3215 case CmpMode::FP: return SystemZISD::VFCMPE;
3216 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3217 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3218 }
3219 llvm_unreachable("Bad mode");
3220
3221 case ISD::SETOGE:
3222 case ISD::SETGE:
3223 switch (Mode) {
3224 case CmpMode::Int: return 0;
3225 case CmpMode::FP: return SystemZISD::VFCMPHE;
3226 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3227 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3228 }
3229 llvm_unreachable("Bad mode");
3230
3231 case ISD::SETOGT:
3232 case ISD::SETGT:
3233 switch (Mode) {
3234 case CmpMode::Int: return SystemZISD::VICMPH;
3235 case CmpMode::FP: return SystemZISD::VFCMPH;
3236 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3237 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3238 }
3239 llvm_unreachable("Bad mode");
3240
3241 case ISD::SETUGT:
3242 switch (Mode) {
3243 case CmpMode::Int: return SystemZISD::VICMPHL;
3244 case CmpMode::FP: return 0;
3245 case CmpMode::StrictFP: return 0;
3246 case CmpMode::SignalingFP: return 0;
3247 }
3248 llvm_unreachable("Bad mode");
3249
3250 default:
3251 return 0;
3252 }
3253}
3254
3255// Return the SystemZISD vector comparison operation for CC or its inverse,
3256// or 0 if neither can be done directly. Indicate in Invert whether the
3257// result is for the inverse of CC. Mode is as above.
3259 bool &Invert) {
3260 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3261 Invert = false;
3262 return Opcode;
3263 }
3264
3265 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3266 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3267 Invert = true;
3268 return Opcode;
3269 }
3270
3271 return 0;
3272}
3273
3274// Return a v2f64 that contains the extended form of elements Start and Start+1
3275// of v4f32 value Op. If Chain is nonnull, return the strict form.
3276static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3277 SDValue Op, SDValue Chain) {
3278 int Mask[] = { Start, -1, Start + 1, -1 };
3279 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3280 if (Chain) {
3281 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3282 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3283 }
3284 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3285}
3286
3287// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3288// producing a result of type VT. If Chain is nonnull, return the strict form.
3289SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3290 const SDLoc &DL, EVT VT,
3291 SDValue CmpOp0,
3292 SDValue CmpOp1,
3293 SDValue Chain) const {
3294 // There is no hardware support for v4f32 (unless we have the vector
3295 // enhancements facility 1), so extend the vector into two v2f64s
3296 // and compare those.
3297 if (CmpOp0.getValueType() == MVT::v4f32 &&
3298 !Subtarget.hasVectorEnhancements1()) {
3299 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3300 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3301 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3302 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3303 if (Chain) {
3304 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3305 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3306 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3307 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3308 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3309 H1.getValue(1), L1.getValue(1),
3310 HRes.getValue(1), LRes.getValue(1) };
3311 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3312 SDValue Ops[2] = { Res, NewChain };
3313 return DAG.getMergeValues(Ops, DL);
3314 }
3315 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3316 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3317 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3318 }
3319 if (Chain) {
3320 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3321 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3322 }
3323 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3324}
3325
3326// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3327// an integer mask of type VT. If Chain is nonnull, we have a strict
3328// floating-point comparison. If in addition IsSignaling is true, we have
3329// a strict signaling floating-point comparison.
3330SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3331 const SDLoc &DL, EVT VT,
3333 SDValue CmpOp0,
3334 SDValue CmpOp1,
3335 SDValue Chain,
3336 bool IsSignaling) const {
3337 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3338 assert (!Chain || IsFP);
3339 assert (!IsSignaling || Chain);
3340 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3341 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3342 bool Invert = false;
3343 SDValue Cmp;
3344 switch (CC) {
3345 // Handle tests for order using (or (ogt y x) (oge x y)).
3346 case ISD::SETUO:
3347 Invert = true;
3348 [[fallthrough]];
3349 case ISD::SETO: {
3350 assert(IsFP && "Unexpected integer comparison");
3351 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3352 DL, VT, CmpOp1, CmpOp0, Chain);
3353 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3354 DL, VT, CmpOp0, CmpOp1, Chain);
3355 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3356 if (Chain)
3357 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3358 LT.getValue(1), GE.getValue(1));
3359 break;
3360 }
3361
3362 // Handle <> tests using (or (ogt y x) (ogt x y)).
3363 case ISD::SETUEQ:
3364 Invert = true;
3365 [[fallthrough]];
3366 case ISD::SETONE: {
3367 assert(IsFP && "Unexpected integer comparison");
3368 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3369 DL, VT, CmpOp1, CmpOp0, Chain);
3370 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3371 DL, VT, CmpOp0, CmpOp1, Chain);
3372 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3373 if (Chain)
3374 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3375 LT.getValue(1), GT.getValue(1));
3376 break;
3377 }
3378
3379 // Otherwise a single comparison is enough. It doesn't really
3380 // matter whether we try the inversion or the swap first, since
3381 // there are no cases where both work.
3382 default:
3383 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3384 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3385 else {
3387 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3388 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3389 else
3390 llvm_unreachable("Unhandled comparison");
3391 }
3392 if (Chain)
3393 Chain = Cmp.getValue(1);
3394 break;
3395 }
3396 if (Invert) {
3397 SDValue Mask =
3398 DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
3399 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3400 }
3401 if (Chain && Chain.getNode() != Cmp.getNode()) {
3402 SDValue Ops[2] = { Cmp, Chain };
3403 Cmp = DAG.getMergeValues(Ops, DL);
3404 }
3405 return Cmp;
3406}
3407
3408SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3409 SelectionDAG &DAG) const {
3410 SDValue CmpOp0 = Op.getOperand(0);
3411 SDValue CmpOp1 = Op.getOperand(1);
3412 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3413 SDLoc DL(Op);
3414 EVT VT = Op.getValueType();
3415 if (VT.isVector())
3416 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3417
3418 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3419 SDValue CCReg = emitCmp(DAG, DL, C);
3420 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3421}
3422
3423SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3424 SelectionDAG &DAG,
3425 bool IsSignaling) const {
3426 SDValue Chain = Op.getOperand(0);
3427 SDValue CmpOp0 = Op.getOperand(1);
3428 SDValue CmpOp1 = Op.getOperand(2);
3429 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3430 SDLoc DL(Op);
3431 EVT VT = Op.getNode()->getValueType(0);
3432 if (VT.isVector()) {
3433 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3434 Chain, IsSignaling);
3435 return Res.getValue(Op.getResNo());
3436 }
3437
3438 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3439 SDValue CCReg = emitCmp(DAG, DL, C);
3440 CCReg->setFlags(Op->getFlags());
3441 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3442 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3443 return DAG.getMergeValues(Ops, DL);
3444}
3445
3446SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3447 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3448 SDValue CmpOp0 = Op.getOperand(2);
3449 SDValue CmpOp1 = Op.getOperand(3);
3450 SDValue Dest = Op.getOperand(4);
3451 SDLoc DL(Op);
3452
3453 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3454 SDValue CCReg = emitCmp(DAG, DL, C);
3455 return DAG.getNode(
3456 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3457 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3458 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3459}
3460
3461// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3462// allowing Pos and Neg to be wider than CmpOp.
3463static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3464 return (Neg.getOpcode() == ISD::SUB &&
3465 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3466 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
3467 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3468 Pos.getOperand(0) == CmpOp)));
3469}
3470
3471// Return the absolute or negative absolute of Op; IsNegative decides which.
3473 bool IsNegative) {
3474 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3475 if (IsNegative)
3476 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3477 DAG.getConstant(0, DL, Op.getValueType()), Op);
3478 return Op;
3479}
3480
3481SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3482 SelectionDAG &DAG) const {
3483 SDValue CmpOp0 = Op.getOperand(0);
3484 SDValue CmpOp1 = Op.getOperand(1);
3485 SDValue TrueOp = Op.getOperand(2);
3486 SDValue FalseOp = Op.getOperand(3);
3487 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3488 SDLoc DL(Op);
3489
3490 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3491
3492 // Check for absolute and negative-absolute selections, including those
3493 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3494 // This check supplements the one in DAGCombiner.
3495 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3496 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3497 C.Op1.getOpcode() == ISD::Constant &&
3498 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3499 C.Op1->getAsZExtVal() == 0) {
3500 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3501 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3502 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3503 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3504 }
3505
3506 SDValue CCReg = emitCmp(DAG, DL, C);
3507 SDValue Ops[] = {TrueOp, FalseOp,
3508 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3509 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3510
3511 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3512}
3513
3514SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3515 SelectionDAG &DAG) const {
3516 SDLoc DL(Node);
3517 const GlobalValue *GV = Node->getGlobal();
3518 int64_t Offset = Node->getOffset();
3519 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3521
3523 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3524 if (isInt<32>(Offset)) {
3525 // Assign anchors at 1<<12 byte boundaries.
3526 uint64_t Anchor = Offset & ~uint64_t(0xfff);
3527 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3528 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3529
3530 // The offset can be folded into the address if it is aligned to a
3531 // halfword.
3532 Offset -= Anchor;
3533 if (Offset != 0 && (Offset & 1) == 0) {
3534 SDValue Full =
3535 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3536 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3537 Offset = 0;
3538 }
3539 } else {
3540 // Conservatively load a constant offset greater than 32 bits into a
3541 // register below.
3542 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3543 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3544 }
3545 } else if (Subtarget.isTargetELF()) {
3546 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3547 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3548 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3550 } else if (Subtarget.isTargetzOS()) {
3551 Result = getADAEntry(DAG, GV, DL, PtrVT);
3552 } else
3553 llvm_unreachable("Unexpected Subtarget");
3554
3555 // If there was a non-zero offset that we didn't fold, create an explicit
3556 // addition for it.
3557 if (Offset != 0)
3558 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3559 DAG.getConstant(Offset, DL, PtrVT));
3560
3561 return Result;
3562}
3563
3564SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3565 SelectionDAG &DAG,
3566 unsigned Opcode,
3567 SDValue GOTOffset) const {
3568 SDLoc DL(Node);
3569 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3570 SDValue Chain = DAG.getEntryNode();
3571 SDValue Glue;
3572
3575 report_fatal_error("In GHC calling convention TLS is not supported");
3576
3577 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3578 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3579 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3580 Glue = Chain.getValue(1);
3581 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3582 Glue = Chain.getValue(1);
3583
3584 // The first call operand is the chain and the second is the TLS symbol.
3586 Ops.push_back(Chain);
3587 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3588 Node->getValueType(0),
3589 0, 0));
3590
3591 // Add argument registers to the end of the list so that they are
3592 // known live into the call.
3593 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3594 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3595
3596 // Add a register mask operand representing the call-preserved registers.
3597 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3598 const uint32_t *Mask =
3599 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3600 assert(Mask && "Missing call preserved mask for calling convention");
3601 Ops.push_back(DAG.getRegisterMask(Mask));
3602
3603 // Glue the call to the argument copies.
3604 Ops.push_back(Glue);
3605
3606 // Emit the call.
3607 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3608 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3609 Glue = Chain.getValue(1);
3610
3611 // Copy the return value from %r2.
3612 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3613}
3614
3615SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3616 SelectionDAG &DAG) const {
3617 SDValue Chain = DAG.getEntryNode();
3618 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3619
3620 // The high part of the thread pointer is in access register 0.
3621 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3622 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3623
3624 // The low part of the thread pointer is in access register 1.
3625 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3626 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3627
3628 // Merge them into a single 64-bit address.
3629 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3630 DAG.getConstant(32, DL, PtrVT));
3631 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3632}
3633
3634SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3635 SelectionDAG &DAG) const {
3636 if (DAG.getTarget().useEmulatedTLS())
3637 return LowerToTLSEmulatedModel(Node, DAG);
3638 SDLoc DL(Node);
3639 const GlobalValue *GV = Node->getGlobal();
3640 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3641 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
3642
3645 report_fatal_error("In GHC calling convention TLS is not supported");
3646
3647 SDValue TP = lowerThreadPointer(DL, DAG);
3648
3649 // Get the offset of GA from the thread pointer, based on the TLS model.
3651 switch (model) {
3653 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3656
3657 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3658 Offset = DAG.getLoad(
3659 PtrVT, DL, DAG.getEntryNode(), Offset,
3661
3662 // Call __tls_get_offset to retrieve the offset.
3663 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3664 break;
3665 }
3666
3668 // Load the GOT offset of the module ID.
3671
3672 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3673 Offset = DAG.getLoad(
3674 PtrVT, DL, DAG.getEntryNode(), Offset,
3676
3677 // Call __tls_get_offset to retrieve the module base offset.
3678 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3679
3680 // Note: The SystemZLDCleanupPass will remove redundant computations
3681 // of the module base offset. Count total number of local-dynamic
3682 // accesses to trigger execution of that pass.
3686
3687 // Add the per-symbol offset.
3689
3690 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3691 DTPOffset = DAG.getLoad(
3692 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3694
3695 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3696 break;
3697 }
3698
3699 case TLSModel::InitialExec: {
3700 // Load the offset from the GOT.
3701 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3704 Offset =
3705 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3707 break;
3708 }
3709
3710 case TLSModel::LocalExec: {
3711 // Force the offset into the constant pool and load it from there.
3714
3715 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3716 Offset = DAG.getLoad(
3717 PtrVT, DL, DAG.getEntryNode(), Offset,
3719 break;
3720 }
3721 }
3722
3723 // Add the base and offset together.
3724 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3725}
3726
3727SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3728 SelectionDAG &DAG) const {
3729 SDLoc DL(Node);
3730 const BlockAddress *BA = Node->getBlockAddress();
3731 int64_t Offset = Node->getOffset();
3732 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3733
3734 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3735 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3736 return Result;
3737}
3738
3739SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3740 SelectionDAG &DAG) const {
3741 SDLoc DL(JT);
3742 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3743 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3744
3745 // Use LARL to load the address of the table.
3746 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3747}
3748
3749SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3750 SelectionDAG &DAG) const {
3751 SDLoc DL(CP);
3752 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3753
3755 if (CP->isMachineConstantPoolEntry())
3756 Result =
3757 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3758 else
3759 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3760 CP->getOffset());
3761
3762 // Use LARL to load the address of the constant pool entry.
3763 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3764}
3765
3766SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3767 SelectionDAG &DAG) const {
3768 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
3770 MachineFrameInfo &MFI = MF.getFrameInfo();
3771 MFI.setFrameAddressIsTaken(true);
3772
3773 SDLoc DL(Op);
3774 unsigned Depth = Op.getConstantOperandVal(0);
3775 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3776
3777 // By definition, the frame address is the address of the back chain. (In
3778 // the case of packed stack without backchain, return the address where the
3779 // backchain would have been stored. This will either be an unused space or
3780 // contain a saved register).
3781 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3782 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3783
3784 if (Depth > 0) {
3785 // FIXME The frontend should detect this case.
3786 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
3787 report_fatal_error("Unsupported stack frame traversal count");
3788
3789 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
3790 while (Depth--) {
3791 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
3793 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
3794 }
3795 }
3796
3797 return BackChain;
3798}
3799
3800SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3801 SelectionDAG &DAG) const {
3803 MachineFrameInfo &MFI = MF.getFrameInfo();
3804 MFI.setReturnAddressIsTaken(true);
3805
3807 return SDValue();
3808
3809 SDLoc DL(Op);
3810 unsigned Depth = Op.getConstantOperandVal(0);
3811 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3812
3813 if (Depth > 0) {
3814 // FIXME The frontend should detect this case.
3815 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
3816 report_fatal_error("Unsupported stack frame traversal count");
3817
3818 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
3819 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
3820 int Offset = (TFL->usePackedStack(MF) ? -2 : 14) *
3822 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
3823 DAG.getConstant(Offset, DL, PtrVT));
3824 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
3826 }
3827
3828 // Return R14D, which has the return address. Mark it an implicit live-in.
3829 Register LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
3830 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3831}
3832
3833SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3834 SelectionDAG &DAG) const {
3835 SDLoc DL(Op);
3836 SDValue In = Op.getOperand(0);
3837 EVT InVT = In.getValueType();
3838 EVT ResVT = Op.getValueType();
3839
3840 // Convert loads directly. This is normally done by DAGCombiner,
3841 // but we need this case for bitcasts that are created during lowering
3842 // and which are then lowered themselves.
3843 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3844 if (ISD::isNormalLoad(LoadN)) {
3845 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3846 LoadN->getBasePtr(), LoadN->getMemOperand());
3847 // Update the chain uses.
3848 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3849 return NewLoad;
3850 }
3851
3852 if (InVT == MVT::i32 && ResVT == MVT::f32) {
3853 SDValue In64;
3854 if (Subtarget.hasHighWord()) {
3855 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3856 MVT::i64);
3857 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3858 MVT::i64, SDValue(U64, 0), In);
3859 } else {
3860 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3861 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3862 DAG.getConstant(32, DL, MVT::i64));
3863 }
3864 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3865 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3866 DL, MVT::f32, Out64);
3867 }
3868 if (InVT == MVT::f32 && ResVT == MVT::i32) {
3869 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3870 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3871 MVT::f64, SDValue(U64, 0), In);
3872 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3873 if (Subtarget.hasHighWord())
3874 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3875 MVT::i32, Out64);
3876 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3877 DAG.getConstant(32, DL, MVT::i64));
3878 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3879 }
3880 llvm_unreachable("Unexpected bitcast combination");
3881}
3882
3883SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3884 SelectionDAG &DAG) const {
3885
3886 if (Subtarget.isTargetXPLINK64())
3887 return lowerVASTART_XPLINK(Op, DAG);
3888 else
3889 return lowerVASTART_ELF(Op, DAG);
3890}
3891
3892SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
3893 SelectionDAG &DAG) const {
3895 SystemZMachineFunctionInfo *FuncInfo =
3897
3898 SDLoc DL(Op);
3899
3900 // vastart just stores the address of the VarArgsFrameIndex slot into the
3901 // memory location argument.
3902 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3903 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3904 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3905 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
3906 MachinePointerInfo(SV));
3907}
3908
3909SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
3910 SelectionDAG &DAG) const {
3912 SystemZMachineFunctionInfo *FuncInfo =
3914 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3915
3916 SDValue Chain = Op.getOperand(0);
3917 SDValue Addr = Op.getOperand(1);
3918 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3919 SDLoc DL(Op);
3920
3921 // The initial values of each field.
3922 const unsigned NumFields = 4;
3923 SDValue Fields[NumFields] = {
3924 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3925 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3926 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3927 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3928 };
3929
3930 // Store each field into its respective slot.
3931 SDValue MemOps[NumFields];
3932 unsigned Offset = 0;
3933 for (unsigned I = 0; I < NumFields; ++I) {
3934 SDValue FieldAddr = Addr;
3935 if (Offset != 0)
3936 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3938 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3940 Offset += 8;
3941 }
3942 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3943}
3944
3945SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3946 SelectionDAG &DAG) const {
3947 SDValue Chain = Op.getOperand(0);
3948 SDValue DstPtr = Op.getOperand(1);
3949 SDValue SrcPtr = Op.getOperand(2);
3950 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3951 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3952 SDLoc DL(Op);
3953
3954 uint32_t Sz =
3955 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
3956 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
3957 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3958 /*isTailCall*/ false, MachinePointerInfo(DstSV),
3959 MachinePointerInfo(SrcSV));
3960}
3961
3962SDValue
3963SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
3964 SelectionDAG &DAG) const {
3965 if (Subtarget.isTargetXPLINK64())
3966 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
3967 else
3968 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
3969}
3970
3971SDValue
3972SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
3973 SelectionDAG &DAG) const {
3974 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3976 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3977 SDValue Chain = Op.getOperand(0);
3978 SDValue Size = Op.getOperand(1);
3979 SDValue Align = Op.getOperand(2);
3980 SDLoc DL(Op);
3981
3982 // If user has set the no alignment function attribute, ignore
3983 // alloca alignments.
3984 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
3985
3986 uint64_t StackAlign = TFI->getStackAlignment();
3987 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3988 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3989
3990 SDValue NeededSpace = Size;
3991
3992 // Add extra space for alignment if needed.
3993 EVT PtrVT = getPointerTy(MF.getDataLayout());
3994 if (ExtraAlignSpace)
3995 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
3996 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3997
3998 bool IsSigned = false;
3999 bool DoesNotReturn = false;
4000 bool IsReturnValueUsed = false;
4001 EVT VT = Op.getValueType();
4002 SDValue AllocaCall =
4003 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
4004 CallingConv::C, IsSigned, DL, DoesNotReturn,
4005 IsReturnValueUsed)
4006 .first;
4007
4008 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4009 // to end of call in order to ensure it isn't broken up from the call
4010 // sequence.
4011 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4012 Register SPReg = Regs.getStackPointerRegister();
4013 Chain = AllocaCall.getValue(1);
4014 SDValue Glue = AllocaCall.getValue(2);
4015 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4016 Chain = NewSPRegNode.getValue(1);
4017
4018 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4019 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4020 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4021
4022 // Dynamically realign if needed.
4023 if (ExtraAlignSpace) {
4024 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4025 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4026 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4027 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4028 }
4029
4030 SDValue Ops[2] = {Result, Chain};
4031 return DAG.getMergeValues(Ops, DL);
4032}
4033
4034SDValue
4035SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4036 SelectionDAG &DAG) const {
4037 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4039 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4040 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4041
4042 SDValue Chain = Op.getOperand(0);
4043 SDValue Size = Op.getOperand(1);
4044 SDValue Align = Op.getOperand(2);
4045 SDLoc DL(Op);
4046
4047 // If user has set the no alignment function attribute, ignore
4048 // alloca alignments.
4049 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4050
4051 uint64_t StackAlign = TFI->getStackAlignment();
4052 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4053 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4054
4056 SDValue NeededSpace = Size;
4057
4058 // Get a reference to the stack pointer.
4059 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4060
4061 // If we need a backchain, save it now.
4062 SDValue Backchain;
4063 if (StoreBackchain)
4064 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4066
4067 // Add extra space for alignment if needed.
4068 if (ExtraAlignSpace)
4069 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4070 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4071
4072 // Get the new stack pointer value.
4073 SDValue NewSP;
4074 if (hasInlineStackProbe(MF)) {
4076 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4077 Chain = NewSP.getValue(1);
4078 }
4079 else {
4080 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4081 // Copy the new stack pointer back.
4082 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4083 }
4084
4085 // The allocated data lives above the 160 bytes allocated for the standard
4086 // frame, plus any outgoing stack arguments. We don't know how much that
4087 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4088 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4089 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4090
4091 // Dynamically realign if needed.
4092 if (RequiredAlign > StackAlign) {
4093 Result =
4094 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4095 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4096 Result =
4097 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4098 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4099 }
4100
4101 if (StoreBackchain)
4102 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4104
4105 SDValue Ops[2] = { Result, Chain };
4106 return DAG.getMergeValues(Ops, DL);
4107}
4108
4109SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4110 SDValue Op, SelectionDAG &DAG) const {
4111 SDLoc DL(Op);
4112
4113 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4114}
4115
4116SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4117 SelectionDAG &DAG) const {
4118 EVT VT = Op.getValueType();
4119 SDLoc DL(Op);
4120 SDValue Ops[2];
4121 if (is32Bit(VT))
4122 // Just do a normal 64-bit multiplication and extract the results.
4123 // We define this so that it can be used for constant division.
4124 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4125 Op.getOperand(1), Ops[1], Ops[0]);
4126 else if (Subtarget.hasMiscellaneousExtensions2())
4127 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4128 // the high result in the even register. ISD::SMUL_LOHI is defined to
4129 // return the low half first, so the results are in reverse order.
4131 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4132 else {
4133 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4134 //
4135 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4136 //
4137 // but using the fact that the upper halves are either all zeros
4138 // or all ones:
4139 //
4140 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4141 //
4142 // and grouping the right terms together since they are quicker than the
4143 // multiplication:
4144 //
4145 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4146 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4147 SDValue LL = Op.getOperand(0);
4148 SDValue RL = Op.getOperand(1);
4149 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4150 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4151 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4152 // the high result in the even register. ISD::SMUL_LOHI is defined to
4153 // return the low half first, so the results are in reverse order.
4155 LL, RL, Ops[1], Ops[0]);
4156 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4157 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4158 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4159 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4160 }
4161 return DAG.getMergeValues(Ops, DL);
4162}
4163
4164SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4165 SelectionDAG &DAG) const {
4166 EVT VT = Op.getValueType();
4167 SDLoc DL(Op);
4168 SDValue Ops[2];
4169 if (is32Bit(VT))
4170 // Just do a normal 64-bit multiplication and extract the results.
4171 // We define this so that it can be used for constant division.
4172 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4173 Op.getOperand(1), Ops[1], Ops[0]);
4174 else
4175 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4176 // the high result in the even register. ISD::UMUL_LOHI is defined to
4177 // return the low half first, so the results are in reverse order.
4179 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4180 return DAG.getMergeValues(Ops, DL);
4181}
4182
4183SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4184 SelectionDAG &DAG) const {
4185 SDValue Op0 = Op.getOperand(0);
4186 SDValue Op1 = Op.getOperand(1);
4187 EVT VT = Op.getValueType();
4188 SDLoc DL(Op);
4189
4190 // We use DSGF for 32-bit division. This means the first operand must
4191 // always be 64-bit, and the second operand should be 32-bit whenever
4192 // that is possible, to improve performance.
4193 if (is32Bit(VT))
4194 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4195 else if (DAG.ComputeNumSignBits(Op1) > 32)
4196 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4197
4198 // DSG(F) returns the remainder in the even register and the
4199 // quotient in the odd register.
4200 SDValue Ops[2];
4201 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4202 return DAG.getMergeValues(Ops, DL);
4203}
4204
4205SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4206 SelectionDAG &DAG) const {
4207 EVT VT = Op.getValueType();
4208 SDLoc DL(Op);
4209
4210 // DL(G) returns the remainder in the even register and the
4211 // quotient in the odd register.
4212 SDValue Ops[2];
4214 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4215 return DAG.getMergeValues(Ops, DL);
4216}
4217
4218SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4219 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4220
4221 // Get the known-zero masks for each operand.
4222 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4223 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4224 DAG.computeKnownBits(Ops[1])};
4225
4226 // See if the upper 32 bits of one operand and the lower 32 bits of the
4227 // other are known zero. They are the low and high operands respectively.
4228 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4229 Known[1].Zero.getZExtValue() };
4230 unsigned High, Low;
4231 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4232 High = 1, Low = 0;
4233 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4234 High = 0, Low = 1;
4235 else
4236 return Op;
4237
4238 SDValue LowOp = Ops[Low];
4239 SDValue HighOp = Ops[High];
4240
4241 // If the high part is a constant, we're better off using IILH.
4242 if (HighOp.getOpcode() == ISD::Constant)
4243 return Op;
4244
4245 // If the low part is a constant that is outside the range of LHI,
4246 // then we're better off using IILF.
4247 if (LowOp.getOpcode() == ISD::Constant) {
4248 int64_t Value = int32_t(LowOp->getAsZExtVal());
4249 if (!isInt<16>(Value))
4250 return Op;
4251 }
4252
4253 // Check whether the high part is an AND that doesn't change the
4254 // high 32 bits and just masks out low bits. We can skip it if so.
4255 if (HighOp.getOpcode() == ISD::AND &&
4256 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4257 SDValue HighOp0 = HighOp.getOperand(0);
4259 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4260 HighOp = HighOp0;
4261 }
4262
4263 // Take advantage of the fact that all GR32 operations only change the
4264 // low 32 bits by truncating Low to an i32 and inserting it directly
4265 // using a subreg. The interesting cases are those where the truncation
4266 // can be folded.
4267 SDLoc DL(Op);
4268 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4269 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4270 MVT::i64, HighOp, Low32);
4271}
4272
4273// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4274SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4275 SelectionDAG &DAG) const {
4276 SDNode *N = Op.getNode();
4277 SDValue LHS = N->getOperand(0);
4278 SDValue RHS = N->getOperand(1);
4279 SDLoc DL(N);
4280
4281 if (N->getValueType(0) == MVT::i128) {
4282 unsigned BaseOp = 0;
4283 unsigned FlagOp = 0;
4284 bool IsBorrow = false;
4285 switch (Op.getOpcode()) {
4286 default: llvm_unreachable("Unknown instruction!");
4287 case ISD::UADDO:
4288 BaseOp = ISD::ADD;
4289 FlagOp = SystemZISD::VACC;
4290 break;
4291 case ISD::USUBO:
4292 BaseOp = ISD::SUB;
4293 FlagOp = SystemZISD::VSCBI;
4294 IsBorrow = true;
4295 break;
4296 }
4297 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4298 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4299 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4300 DAG.getValueType(MVT::i1));
4301 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4302 if (IsBorrow)
4303 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4304 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4305 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4306 }
4307
4308 unsigned BaseOp = 0;
4309 unsigned CCValid = 0;
4310 unsigned CCMask = 0;
4311
4312 switch (Op.getOpcode()) {
4313 default: llvm_unreachable("Unknown instruction!");
4314 case ISD::SADDO:
4315 BaseOp = SystemZISD::SADDO;
4316 CCValid = SystemZ::CCMASK_ARITH;
4318 break;
4319 case ISD::SSUBO:
4320 BaseOp = SystemZISD::SSUBO;
4321 CCValid = SystemZ::CCMASK_ARITH;
4323 break;
4324 case ISD::UADDO:
4325 BaseOp = SystemZISD::UADDO;
4326 CCValid = SystemZ::CCMASK_LOGICAL;
4328 break;
4329 case ISD::USUBO:
4330 BaseOp = SystemZISD::USUBO;
4331 CCValid = SystemZ::CCMASK_LOGICAL;
4333 break;
4334 }
4335
4336 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4337 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4338
4339 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4340 if (N->getValueType(1) == MVT::i1)
4341 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4342
4343 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4344}
4345
4346static bool isAddCarryChain(SDValue Carry) {
4347 while (Carry.getOpcode() == ISD::UADDO_CARRY)
4348 Carry = Carry.getOperand(2);
4349 return Carry.getOpcode() == ISD::UADDO;
4350}
4351
4352static bool isSubBorrowChain(SDValue Carry) {
4353 while (Carry.getOpcode() == ISD::USUBO_CARRY)
4354 Carry = Carry.getOperand(2);
4355 return Carry.getOpcode() == ISD::USUBO;
4356}
4357
4358// Lower UADDO_CARRY/USUBO_CARRY nodes.
4359SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4360 SelectionDAG &DAG) const {
4361
4362 SDNode *N = Op.getNode();
4363 MVT VT = N->getSimpleValueType(0);
4364
4365 // Let legalize expand this if it isn't a legal type yet.
4366 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4367 return SDValue();
4368
4369 SDValue LHS = N->getOperand(0);
4370 SDValue RHS = N->getOperand(1);
4371 SDValue Carry = Op.getOperand(2);
4372 SDLoc DL(N);
4373
4374 if (VT == MVT::i128) {
4375 unsigned BaseOp = 0;
4376 unsigned FlagOp = 0;
4377 bool IsBorrow = false;
4378 switch (Op.getOpcode()) {
4379 default: llvm_unreachable("Unknown instruction!");
4380 case ISD::UADDO_CARRY:
4381 BaseOp = SystemZISD::VAC;
4382 FlagOp = SystemZISD::VACCC;
4383 break;
4384 case ISD::USUBO_CARRY:
4385 BaseOp = SystemZISD::VSBI;
4386 FlagOp = SystemZISD::VSBCBI;
4387 IsBorrow = true;
4388 break;
4389 }
4390 if (IsBorrow)
4391 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4392 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
4393 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
4394 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
4395 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
4396 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4397 DAG.getValueType(MVT::i1));
4398 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4399 if (IsBorrow)
4400 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4401 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4402 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4403 }
4404
4405 unsigned BaseOp = 0;
4406 unsigned CCValid = 0;
4407 unsigned CCMask = 0;
4408
4409 switch (Op.getOpcode()) {
4410 default: llvm_unreachable("Unknown instruction!");
4411 case ISD::UADDO_CARRY:
4412 if (!isAddCarryChain(Carry))
4413 return SDValue();
4414
4415 BaseOp = SystemZISD::ADDCARRY;
4416 CCValid = SystemZ::CCMASK_LOGICAL;
4418 break;
4419 case ISD::USUBO_CARRY:
4420 if (!isSubBorrowChain(Carry))
4421 return SDValue();
4422
4423 BaseOp = SystemZISD::SUBCARRY;
4424 CCValid = SystemZ::CCMASK_LOGICAL;
4426 break;
4427 }
4428
4429 // Set the condition code from the carry flag.
4430 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4431 DAG.getConstant(CCValid, DL, MVT::i32),
4432 DAG.getConstant(CCMask, DL, MVT::i32));
4433
4434 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4435 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4436
4437 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4438 if (N->getValueType(1) == MVT::i1)
4439 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4440
4441 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4442}
4443
4444SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4445 SelectionDAG &DAG) const {
4446 EVT VT = Op.getValueType();
4447 SDLoc DL(Op);
4448 Op = Op.getOperand(0);
4449
4450 if (VT.getScalarSizeInBits() == 128) {
4451 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
4452 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
4453 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
4454 DAG.getConstant(0, DL, MVT::i64));
4455 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4456 return Op;
4457 }
4458
4459 // Handle vector types via VPOPCT.
4460 if (VT.isVector()) {
4461 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4462 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4463 switch (VT.getScalarSizeInBits()) {
4464 case 8:
4465 break;
4466 case 16: {
4467 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4468 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
4469 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
4470 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4471 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
4472 break;
4473 }
4474 case 32: {
4475 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4476 DAG.getConstant(0, DL, MVT::i32));
4477 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4478 break;
4479 }
4480 case 64: {
4481 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4482 DAG.getConstant(0, DL, MVT::i32));
4483 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
4484 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4485 break;
4486 }
4487 default:
4488 llvm_unreachable("Unexpected type");
4489 }
4490 return Op;
4491 }
4492
4493 // Get the known-zero mask for the operand.
4494 KnownBits Known = DAG.computeKnownBits(Op);
4495 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
4496 if (NumSignificantBits == 0)
4497 return DAG.getConstant(0, DL, VT);
4498
4499 // Skip known-zero high parts of the operand.
4500 int64_t OrigBitSize = VT.getSizeInBits();
4501 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
4502 BitSize = std::min(BitSize, OrigBitSize);
4503
4504 // The POPCNT instruction counts the number of bits in each byte.
4505 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
4506 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
4507 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
4508
4509 // Add up per-byte counts in a binary tree. All bits of Op at
4510 // position larger than BitSize remain zero throughout.
4511 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
4512 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
4513 if (BitSize != OrigBitSize)
4514 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
4515 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
4516 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4517 }
4518
4519 // Extract overall result from high byte.
4520 if (BitSize > 8)
4521 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
4522 DAG.getConstant(BitSize - 8, DL, VT));
4523
4524 return Op;
4525}
4526
4527SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
4528 SelectionDAG &DAG) const {
4529 SDLoc DL(Op);
4530 AtomicOrdering FenceOrdering =
4531 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4532 SyncScope::ID FenceSSID =
4533 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4534
4535 // The only fence that needs an instruction is a sequentially-consistent
4536 // cross-thread fence.
4537 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
4538 FenceSSID == SyncScope::System) {
4539 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
4540 Op.getOperand(0)),
4541 0);
4542 }
4543
4544 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4545 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
4546}
4547
4548SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
4549 SelectionDAG &DAG) const {
4550 auto *Node = cast<AtomicSDNode>(Op.getNode());
4551 assert(Node->getMemoryVT() == MVT::i128 && "Only custom lowering i128.");
4552 // Use same code to handle both legal and non-legal i128 types.
4555 return DAG.getMergeValues(Results, SDLoc(Op));
4556}
4557
4558// Prepare for a Compare And Swap for a subword operation. This needs to be
4559// done in memory with 4 bytes at natural alignment.
4561 SDValue &AlignedAddr, SDValue &BitShift,
4562 SDValue &NegBitShift) {
4563 EVT PtrVT = Addr.getValueType();
4564 EVT WideVT = MVT::i32;
4565
4566 // Get the address of the containing word.
4567 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4568 DAG.getConstant(-4, DL, PtrVT));
4569
4570 // Get the number of bits that the word must be rotated left in order
4571 // to bring the field to the top bits of a GR32.
4572 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4573 DAG.getConstant(3, DL, PtrVT));
4574 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4575
4576 // Get the complementing shift amount, for rotating a field in the top
4577 // bits back to its proper position.
4578 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4579 DAG.getConstant(0, DL, WideVT), BitShift);
4580
4581}
4582
4583// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
4584// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
4585SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
4586 SelectionDAG &DAG,
4587 unsigned Opcode) const {
4588 auto *Node = cast<AtomicSDNode>(Op.getNode());
4589
4590 // 32-bit operations need no special handling.
4591 EVT NarrowVT = Node->getMemoryVT();
4592 EVT WideVT = MVT::i32;
4593 if (NarrowVT == WideVT)
4594 return Op;
4595
4596 int64_t BitSize = NarrowVT.getSizeInBits();
4597 SDValue ChainIn = Node->getChain();
4598 SDValue Addr = Node->getBasePtr();
4599 SDValue Src2 = Node->getVal();
4600 MachineMemOperand *MMO = Node->getMemOperand();
4601 SDLoc DL(Node);
4602
4603 // Convert atomic subtracts of constants into additions.
4604 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
4605 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
4607 Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
4608 }
4609
4610 SDValue AlignedAddr, BitShift, NegBitShift;
4611 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4612
4613 // Extend the source operand to 32 bits and prepare it for the inner loop.
4614 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
4615 // operations require the source to be shifted in advance. (This shift
4616 // can be folded if the source is constant.) For AND and NAND, the lower
4617 // bits must be set, while for other opcodes they should be left clear.
4618 if (Opcode != SystemZISD::ATOMIC_SWAPW)
4619 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
4620 DAG.getConstant(32 - BitSize, DL, WideVT));
4621 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
4623 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
4624 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
4625
4626 // Construct the ATOMIC_LOADW_* node.
4627 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
4628 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
4629 DAG.getConstant(BitSize, DL, WideVT) };
4630 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
4631 NarrowVT, MMO);
4632
4633 // Rotate the result of the final CS so that the field is in the lower
4634 // bits of a GR32, then truncate it.
4635 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
4636 DAG.getConstant(BitSize, DL, WideVT));
4637 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
4638
4639 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
4640 return DAG.getMergeValues(RetOps, DL);
4641}
4642
4643// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
4644// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
4645SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
4646 SelectionDAG &DAG) const {
4647 auto *Node = cast<AtomicSDNode>(Op.getNode());
4648 EVT MemVT = Node->getMemoryVT();
4649 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
4650 // A full-width operation: negate and use LAA(G).
4651 assert(Op.getValueType() == MemVT && "Mismatched VTs");
4652 assert(Subtarget.hasInterlockedAccess1() &&
4653 "Should have been expanded by AtomicExpand pass.");
4654 SDValue Src2 = Node->getVal();
4655 SDLoc DL(Src2);
4656 SDValue NegSrc2 =
4657 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
4658 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
4659 Node->getChain(), Node->getBasePtr(), NegSrc2,
4660 Node->getMemOperand());
4661 }
4662
4663 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
4664}
4665
4666// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
4667SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
4668 SelectionDAG &DAG) const {
4669 auto *Node = cast<AtomicSDNode>(Op.getNode());
4670 SDValue ChainIn = Node->getOperand(0);
4671 SDValue Addr = Node->getOperand(1);
4672 SDValue CmpVal = Node->getOperand(2);
4673 SDValue SwapVal = Node->getOperand(3);
4674 MachineMemOperand *MMO = Node->getMemOperand();
4675 SDLoc DL(Node);
4676
4677 if (Node->getMemoryVT() == MVT::i128) {
4678 // Use same code to handle both legal and non-legal i128 types.
4681 return DAG.getMergeValues(Results, DL);
4682 }
4683
4684 // We have native support for 32-bit and 64-bit compare and swap, but we
4685 // still need to expand extracting the "success" result from the CC.
4686 EVT NarrowVT = Node->getMemoryVT();
4687 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
4688 if (NarrowVT == WideVT) {
4689 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4690 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
4692 DL, Tys, Ops, NarrowVT, MMO);
4693 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4695
4696 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
4697 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4698 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4699 return SDValue();
4700 }
4701
4702 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
4703 // via a fullword ATOMIC_CMP_SWAPW operation.
4704 int64_t BitSize = NarrowVT.getSizeInBits();
4705
4706 SDValue AlignedAddr, BitShift, NegBitShift;
4707 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4708
4709 // Construct the ATOMIC_CMP_SWAPW node.
4710 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4711 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
4712 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
4714 VTList, Ops, NarrowVT, MMO);
4715 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4717
4718 // emitAtomicCmpSwapW() will zero extend the result (original value).
4719 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
4720 DAG.getValueType(NarrowVT));
4721 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
4722 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4723 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4724 return SDValue();
4725}
4726
4728SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
4729 // Because of how we convert atomic_load and atomic_store to normal loads and
4730 // stores in the DAG, we need to ensure that the MMOs are marked volatile
4731 // since DAGCombine hasn't been updated to account for atomic, but non
4732 // volatile loads. (See D57601)
4733 if (auto *SI = dyn_cast<StoreInst>(&I))
4734 if (SI->isAtomic())
4736 if (auto *LI = dyn_cast<LoadInst>(&I))
4737 if (LI->isAtomic())
4739 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
4740 if (AI->isAtomic())
4742 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
4743 if (AI->isAtomic())
4746}
4747
4748SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
4749 SelectionDAG &DAG) const {
4751 auto *Regs = Subtarget.getSpecialRegisters();
4753 report_fatal_error("Variable-sized stack allocations are not supported "
4754 "in GHC calling convention");
4755 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
4756 Regs->getStackPointerRegister(), Op.getValueType());
4757}
4758
4759SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
4760 SelectionDAG &DAG) const {
4762 auto *Regs = Subtarget.getSpecialRegisters();
4763 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4764
4766 report_fatal_error("Variable-sized stack allocations are not supported "
4767 "in GHC calling convention");
4768
4769 SDValue Chain = Op.getOperand(0);
4770 SDValue NewSP = Op.getOperand(1);
4771 SDValue Backchain;
4772 SDLoc DL(Op);
4773
4774 if (StoreBackchain) {
4775 SDValue OldSP = DAG.getCopyFromReg(
4776 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
4777 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4779 }
4780
4781 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
4782
4783 if (StoreBackchain)
4784 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4786
4787 return Chain;
4788}
4789
4790SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
4791 SelectionDAG &DAG) const {
4792 bool IsData = Op.getConstantOperandVal(4);
4793 if (!IsData)
4794 // Just preserve the chain.
4795 return Op.getOperand(0);
4796
4797 SDLoc DL(Op);
4798 bool IsWrite = Op.getConstantOperandVal(2);
4799 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
4800 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
4801 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
4802 Op.getOperand(1)};
4804 Node->getVTList(), Ops,
4805 Node->getMemoryVT(), Node->getMemOperand());
4806}
4807
4808// Convert condition code in CCReg to an i32 value.
4810 SDLoc DL(CCReg);
4811 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
4812 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
4813 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
4814}
4815
4816SDValue
4817SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4818 SelectionDAG &DAG) const {
4819 unsigned Opcode, CCValid;
4820 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
4821 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
4822 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
4823 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
4824 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
4825 return SDValue();
4826 }
4827
4828 return SDValue();
4829}
4830
4831SDValue
4832SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4833 SelectionDAG &DAG) const {
4834 unsigned Opcode, CCValid;
4835 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
4836 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
4837 if (Op->getNumValues() == 1)
4838 return getCCResult(DAG, SDValue(Node, 0));
4839 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
4840 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
4841 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
4842 }
4843
4844 unsigned Id = Op.getConstantOperandVal(0);
4845 switch (Id) {
4846 case Intrinsic::thread_pointer:
4847 return lowerThreadPointer(SDLoc(Op), DAG);
4848
4849 case Intrinsic::s390_vpdi:
4850 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
4851 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4852
4853 case Intrinsic::s390_vperm:
4854 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
4855 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4856
4857 case Intrinsic::s390_vuphb:
4858 case Intrinsic::s390_vuphh:
4859 case Intrinsic::s390_vuphf:
4860 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
4861 Op.getOperand(1));
4862
4863 case Intrinsic::s390_vuplhb:
4864 case Intrinsic::s390_vuplhh:
4865 case Intrinsic::s390_vuplhf:
4866 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
4867 Op.getOperand(1));
4868
4869 case Intrinsic::s390_vuplb:
4870 case Intrinsic::s390_vuplhw:
4871 case Intrinsic::s390_vuplf:
4872 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
4873 Op.getOperand(1));
4874
4875 case Intrinsic::s390_vupllb:
4876 case Intrinsic::s390_vupllh:
4877 case Intrinsic::s390_vupllf:
4878 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
4879 Op.getOperand(1));
4880
4881 case Intrinsic::s390_vsumb:
4882 case Intrinsic::s390_vsumh:
4883 case Intrinsic::s390_vsumgh:
4884 case Intrinsic::s390_vsumgf:
4885 case Intrinsic::s390_vsumqf:
4886 case Intrinsic::s390_vsumqg:
4887 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
4888 Op.getOperand(1), Op.getOperand(2));
4889
4890 case Intrinsic::s390_vaq:
4891 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
4892 Op.getOperand(1), Op.getOperand(2));
4893 case Intrinsic::s390_vaccb:
4894 case Intrinsic::s390_vacch:
4895 case Intrinsic::s390_vaccf:
4896 case Intrinsic::s390_vaccg:
4897 case Intrinsic::s390_vaccq:
4898 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
4899 Op.getOperand(1), Op.getOperand(2));
4900 case Intrinsic::s390_vacq:
4901 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
4902 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4903 case Intrinsic::s390_vacccq:
4904 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
4905 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4906
4907 case Intrinsic::s390_vsq:
4908 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
4909 Op.getOperand(1), Op.getOperand(2));
4910 case Intrinsic::s390_vscbib:
4911 case Intrinsic::s390_vscbih:
4912 case Intrinsic::s390_vscbif:
4913 case Intrinsic::s390_vscbig:
4914 case Intrinsic::s390_vscbiq:
4915 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
4916 Op.getOperand(1), Op.getOperand(2));
4917 case Intrinsic::s390_vsbiq:
4918 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
4919 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4920 case Intrinsic::s390_vsbcbiq:
4921 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
4922 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4923 }
4924
4925 return SDValue();
4926}
4927
4928namespace {
4929// Says that SystemZISD operation Opcode can be used to perform the equivalent
4930// of a VPERM with permute vector Bytes. If Opcode takes three operands,
4931// Operand is the constant third operand, otherwise it is the number of
4932// bytes in each element of the result.
4933struct Permute {
4934 unsigned Opcode;
4935 unsigned Operand;
4936 unsigned char Bytes[SystemZ::VectorBytes];
4937};
4938}
4939
4940static const Permute PermuteForms[] = {
4941 // VMRHG
4943 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
4944 // VMRHF
4946 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
4947 // VMRHH
4949 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
4950 // VMRHB
4952 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
4953 // VMRLG
4955 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
4956 // VMRLF
4958 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
4959 // VMRLH
4961 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
4962 // VMRLB
4964 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
4965 // VPKG
4966 { SystemZISD::PACK, 4,
4967 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
4968 // VPKF
4969 { SystemZISD::PACK, 2,
4970 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
4971 // VPKH
4972 { SystemZISD::PACK, 1,
4973 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
4974 // VPDI V1, V2, 4 (low half of V1, high half of V2)
4976 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
4977 // VPDI V1, V2, 1 (high half of V1, low half of V2)
4979 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
4980};
4981
4982// Called after matching a vector shuffle against a particular pattern.
4983// Both the original shuffle and the pattern have two vector operands.
4984// OpNos[0] is the operand of the original shuffle that should be used for
4985// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
4986// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
4987// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
4988// for operands 0 and 1 of the pattern.
4989static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
4990 if (OpNos[0] < 0) {
4991 if (OpNos[1] < 0)
4992 return false;
4993 OpNo0 = OpNo1 = OpNos[1];
4994 } else if (OpNos[1] < 0) {
4995 OpNo0 = OpNo1 = OpNos[0];
4996 } else {
4997 OpNo0 = OpNos[0];
4998 OpNo1 = OpNos[1];
4999 }
5000 return true;
5001}
5002
5003// Bytes is a VPERM-like permute vector, except that -1 is used for
5004// undefined bytes. Return true if the VPERM can be implemented using P.
5005// When returning true set OpNo0 to the VPERM operand that should be
5006// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5007//
5008// For example, if swapping the VPERM operands allows P to match, OpNo0
5009// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5010// operand, but rewriting it to use two duplicated operands allows it to
5011// match P, then OpNo0 and OpNo1 will be the same.
5012static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5013 unsigned &OpNo0, unsigned &OpNo1) {
5014 int OpNos[] = { -1, -1 };
5015 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5016 int Elt = Bytes[I];
5017 if (Elt >= 0) {
5018 // Make sure that the two permute vectors use the same suboperand
5019 // byte number. Only the operand numbers (the high bits) are
5020 // allowed to differ.
5021 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5022 return false;
5023 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5024 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5025 // Make sure that the operand mappings are consistent with previous
5026 // elements.
5027 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5028 return false;
5029 OpNos[ModelOpNo] = RealOpNo;
5030 }
5031 }
5032 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5033}
5034
5035// As above, but search for a matching permute.
5036static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5037 unsigned &OpNo0, unsigned &OpNo1) {
5038 for (auto &P : PermuteForms)
5039 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5040 return &P;
5041 return nullptr;
5042}
5043
5044// Bytes is a VPERM-like permute vector, except that -1 is used for
5045// undefined bytes. This permute is an operand of an outer permute.
5046// See whether redistributing the -1 bytes gives a shuffle that can be
5047// implemented using P. If so, set Transform to a VPERM-like permute vector
5048// that, when applied to the result of P, gives the original permute in Bytes.
5050 const Permute &P,
5051 SmallVectorImpl<int> &Transform) {
5052 unsigned To = 0;
5053 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5054 int Elt = Bytes[From];
5055 if (Elt < 0)
5056 // Byte number From of the result is undefined.
5057 Transform[From] = -1;
5058 else {
5059 while (P.Bytes[To] != Elt) {
5060 To += 1;
5061 if (To == SystemZ::VectorBytes)
5062 return false;
5063 }
5064 Transform[From] = To;
5065 }
5066 }
5067 return true;
5068}
5069
5070// As above, but search for a matching permute.
5071static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5072 SmallVectorImpl<int> &Transform) {
5073 for (auto &P : PermuteForms)
5074 if (matchDoublePermute(Bytes, P, Transform))
5075 return &P;
5076 return nullptr;
5077}
5078
5079// Convert the mask of the given shuffle op into a byte-level mask,
5080// as if it had type vNi8.
5081static bool getVPermMask(SDValue ShuffleOp,
5082 SmallVectorImpl<int> &Bytes) {
5083 EVT VT = ShuffleOp.getValueType();
5084 unsigned NumElements = VT.getVectorNumElements();
5085 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5086
5087 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5088 Bytes.resize(NumElements * BytesPerElement, -1);
5089 for (unsigned I = 0; I < NumElements; ++I) {
5090 int Index = VSN->getMaskElt(I);
5091 if (Index >= 0)
5092 for (unsigned J = 0; J < BytesPerElement; ++J)
5093 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5094 }
5095 return true;
5096 }
5097 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5098 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5099 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5100 Bytes.resize(NumElements * BytesPerElement, -1);
5101 for (unsigned I = 0; I < NumElements; ++I)
5102 for (unsigned J = 0; J < BytesPerElement; ++J)
5103 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5104 return true;
5105 }
5106 return false;
5107}
5108
5109// Bytes is a VPERM-like permute vector, except that -1 is used for
5110// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5111// the result come from a contiguous sequence of bytes from one input.
5112// Set Base to the selector for the first byte if so.
5113static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5114 unsigned BytesPerElement, int &Base) {
5115 Base = -1;
5116 for (unsigned I = 0; I < BytesPerElement; ++I) {
5117 if (Bytes[Start + I] >= 0) {
5118 unsigned Elem = Bytes[Start + I];
5119 if (Base < 0) {
5120 Base = Elem - I;
5121 // Make sure the bytes would come from one input operand.
5122 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5123 return false;
5124 } else if (unsigned(Base) != Elem - I)
5125 return false;
5126 }
5127 }
5128 return true;
5129}
5130
5131// Bytes is a VPERM-like permute vector, except that -1 is used for
5132// undefined bytes. Return true if it can be performed using VSLDB.
5133// When returning true, set StartIndex to the shift amount and OpNo0
5134// and OpNo1 to the VPERM operands that should be used as the first
5135// and second shift operand respectively.
5137 unsigned &StartIndex, unsigned &OpNo0,
5138 unsigned &OpNo1) {
5139 int OpNos[] = { -1, -1 };
5140 int Shift = -1;
5141 for (unsigned I = 0; I < 16; ++I) {
5142 int Index = Bytes[I];
5143 if (Index >= 0) {
5144 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5145 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5146 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5147 if (Shift < 0)
5148 Shift = ExpectedShift;
5149 else if (Shift != ExpectedShift)
5150 return false;
5151 // Make sure that the operand mappings are consistent with previous
5152 // elements.
5153 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5154 return false;
5155 OpNos[ModelOpNo] = RealOpNo;
5156 }
5157 }
5158 StartIndex = Shift;
5159 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5160}
5161
5162// Create a node that performs P on operands Op0 and Op1, casting the
5163// operands to the appropriate type. The type of the result is determined by P.
5165 const Permute &P, SDValue Op0, SDValue Op1) {
5166 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5167 // elements of a PACK are twice as wide as the outputs.
5168 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5169 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5170 P.Operand);
5171 // Cast both operands to the appropriate type.
5172 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5173 SystemZ::VectorBytes / InBytes);
5174 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5175 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5176 SDValue Op;
5177 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5178 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5179 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5180 } else if (P.Opcode == SystemZISD::PACK) {
5181 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5182 SystemZ::VectorBytes / P.Operand);
5183 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5184 } else {
5185 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5186 }
5187 return Op;
5188}
5189
5190static bool isZeroVector(SDValue N) {
5191 if (N->getOpcode() == ISD::BITCAST)
5192 N = N->getOperand(0);
5193 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5194 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5195 return Op->getZExtValue() == 0;
5196 return ISD::isBuildVectorAllZeros(N.getNode());
5197}
5198
5199// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5200static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5201 for (unsigned I = 0; I < Num ; I++)
5202 if (isZeroVector(Ops[I]))
5203 return I;
5204 return UINT32_MAX;
5205}
5206
5207// Bytes is a VPERM-like permute vector, except that -1 is used for
5208// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5209// VSLDB or VPERM.
5211 SDValue *Ops,
5212 const SmallVectorImpl<int> &Bytes) {
5213 for (unsigned I = 0; I < 2; ++I)
5214 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5215
5216 // First see whether VSLDB can be used.
5217 unsigned StartIndex, OpNo0, OpNo1;
5218 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5219 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5220 Ops[OpNo1],
5221 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5222
5223 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5224 // eliminate a zero vector by reusing any zero index in the permute vector.
5225 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5226 if (ZeroVecIdx != UINT32_MAX) {
5227 bool MaskFirst = true;
5228 int ZeroIdx = -1;
5229 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5230 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5231 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5232 if (OpNo == ZeroVecIdx && I == 0) {
5233 // If the first byte is zero, use mask as first operand.
5234 ZeroIdx = 0;
5235 break;
5236 }
5237 if (OpNo != ZeroVecIdx && Byte == 0) {
5238 // If mask contains a zero, use it by placing that vector first.
5239 ZeroIdx = I + SystemZ::VectorBytes;
5240 MaskFirst = false;
5241 break;
5242 }
5243 }
5244 if (ZeroIdx != -1) {
5245 SDValue IndexNodes[SystemZ::VectorBytes];
5246 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5247 if (Bytes[I] >= 0) {
5248 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5249 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5250 if (OpNo == ZeroVecIdx)
5251 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5252 else {
5253 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5254 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5255 }
5256 } else
5257 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5258 }
5259 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5260 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5261 if (MaskFirst)
5262 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5263 Mask);
5264 else
5265 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5266 Mask);
5267 }
5268 }
5269
5270 SDValue IndexNodes[SystemZ::VectorBytes];
5271 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5272 if (Bytes[I] >= 0)
5273 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5274 else
5275 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5276 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5277 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5278 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5279}
5280
5281namespace {
5282// Describes a general N-operand vector shuffle.
5283struct GeneralShuffle {
5284 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}
5285 void addUndef();
5286 bool add(SDValue, unsigned);
5287 SDValue getNode(SelectionDAG &, const SDLoc &);
5288 void tryPrepareForUnpack();
5289 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5290 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5291
5292 // The operands of the shuffle.
5294
5295 // Index I is -1 if byte I of the result is undefined. Otherwise the
5296 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5297 // Bytes[I] / SystemZ::VectorBytes.
5299
5300 // The type of the shuffle result.
5301 EVT VT;
5302
5303 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5304 unsigned UnpackFromEltSize;
5305};
5306}
5307
5308// Add an extra undefined element to the shuffle.
5309void GeneralShuffle::addUndef() {
5310 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5311 for (unsigned I = 0; I < BytesPerElement; ++I)
5312 Bytes.push_back(-1);
5313}
5314
5315// Add an extra element to the shuffle, taking it from element Elem of Op.
5316// A null Op indicates a vector input whose value will be calculated later;
5317// there is at most one such input per shuffle and it always has the same
5318// type as the result. Aborts and returns false if the source vector elements
5319// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
5320// LLVM they become implicitly extended, but this is rare and not optimized.
5321bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
5322 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5323
5324 // The source vector can have wider elements than the result,
5325 // either through an explicit TRUNCATE or because of type legalization.
5326 // We want the least significant part.
5327 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
5328 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
5329
5330 // Return false if the source elements are smaller than their destination
5331 // elements.
5332 if (FromBytesPerElement < BytesPerElement)
5333 return false;
5334
5335 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
5336 (FromBytesPerElement - BytesPerElement));
5337
5338 // Look through things like shuffles and bitcasts.
5339 while (Op.getNode()) {
5340 if (Op.getOpcode() == ISD::BITCAST)
5341 Op = Op.getOperand(0);
5342 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
5343 // See whether the bytes we need come from a contiguous part of one
5344 // operand.
5346 if (!getVPermMask(Op, OpBytes))
5347 break;
5348 int NewByte;
5349 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
5350 break;
5351 if (NewByte < 0) {
5352 addUndef();
5353 return true;
5354 }
5355 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
5356 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
5357 } else if (Op.isUndef()) {
5358 addUndef();
5359 return true;
5360 } else
5361 break;
5362 }
5363
5364 // Make sure that the source of the extraction is in Ops.
5365 unsigned OpNo = 0;
5366 for (; OpNo < Ops.size(); ++OpNo)
5367 if (Ops[OpNo] == Op)
5368 break;
5369 if (OpNo == Ops.size())
5370 Ops.push_back(Op);
5371
5372 // Add the element to Bytes.
5373 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
5374 for (unsigned I = 0; I < BytesPerElement; ++I)
5375 Bytes.push_back(Base + I);
5376
5377 return true;
5378}
5379
5380// Return SDNodes for the completed shuffle.
5381SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
5382 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
5383
5384 if (Ops.size() == 0)
5385 return DAG.getUNDEF(VT);
5386
5387 // Use a single unpack if possible as the last operation.
5388 tryPrepareForUnpack();
5389
5390 // Make sure that there are at least two shuffle operands.
5391 if (Ops.size() == 1)
5392 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
5393
5394 // Create a tree of shuffles, deferring root node until after the loop.
5395 // Try to redistribute the undefined elements of non-root nodes so that
5396 // the non-root shuffles match something like a pack or merge, then adjust
5397 // the parent node's permute vector to compensate for the new order.
5398 // Among other things, this copes with vectors like <2 x i16> that were
5399 // padded with undefined elements during type legalization.
5400 //
5401 // In the best case this redistribution will lead to the whole tree
5402 // using packs and merges. It should rarely be a loss in other cases.
5403 unsigned Stride = 1;
5404 for (; Stride * 2 < Ops.size(); Stride *= 2) {
5405 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
5406 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
5407
5408 // Create a mask for just these two operands.
5410 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5411 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
5412 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
5413 if (OpNo == I)
5414 NewBytes[J] = Byte;
5415 else if (OpNo == I + Stride)
5416 NewBytes[J] = SystemZ::VectorBytes + Byte;
5417 else
5418 NewBytes[J] = -1;
5419 }
5420 // See if it would be better to reorganize NewMask to avoid using VPERM.
5422 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
5423 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
5424 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
5425 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5426 if (NewBytes[J] >= 0) {
5427 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
5428 "Invalid double permute");
5429 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
5430 } else
5431 assert(NewBytesMap[J] < 0 && "Invalid double permute");
5432 }
5433 } else {
5434 // Just use NewBytes on the operands.
5435 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
5436 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
5437 if (NewBytes[J] >= 0)
5438 Bytes[J] = I * SystemZ::VectorBytes + J;
5439 }
5440 }
5441 }
5442
5443 // Now we just have 2 inputs. Put the second operand in Ops[1].
5444 if (Stride > 1) {
5445 Ops[1] = Ops[Stride];
5446 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5447 if (Bytes[I] >= int(SystemZ::VectorBytes))
5448 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
5449 }
5450
5451 // Look for an instruction that can do the permute without resorting
5452 // to VPERM.
5453 unsigned OpNo0, OpNo1;
5454 SDValue Op;
5455 if (unpackWasPrepared() && Ops[1].isUndef())
5456 Op = Ops[0];
5457 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
5458 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
5459 else
5460 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
5461
5462 Op = insertUnpackIfPrepared(DAG, DL, Op);
5463
5464 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5465}
5466
5467#ifndef NDEBUG
5468static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
5469 dbgs() << Msg.c_str() << " { ";
5470 for (unsigned i = 0; i < Bytes.size(); i++)
5471 dbgs() << Bytes[i] << " ";
5472 dbgs() << "}\n";
5473}
5474#endif
5475
5476// If the Bytes vector matches an unpack operation, prepare to do the unpack
5477// after all else by removing the zero vector and the effect of the unpack on
5478// Bytes.
5479void GeneralShuffle::tryPrepareForUnpack() {
5480 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
5481 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
5482 return;
5483
5484 // Only do this if removing the zero vector reduces the depth, otherwise
5485 // the critical path will increase with the final unpack.
5486 if (Ops.size() > 2 &&
5487 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
5488 return;
5489
5490 // Find an unpack that would allow removing the zero vector from Ops.
5491 UnpackFromEltSize = 1;
5492 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
5493 bool MatchUnpack = true;
5495 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
5496 unsigned ToEltSize = UnpackFromEltSize * 2;
5497 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
5498 if (!IsZextByte)
5499 SrcBytes.push_back(Bytes[Elt]);
5500 if (Bytes[Elt] != -1) {
5501 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
5502 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
5503 MatchUnpack = false;
5504 break;
5505 }
5506 }
5507 }
5508 if (MatchUnpack) {
5509 if (Ops.size() == 2) {
5510 // Don't use unpack if a single source operand needs rearrangement.
5511 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
5512 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
5513 UnpackFromEltSize = UINT_MAX;
5514 return;
5515 }
5516 }
5517 break;
5518 }
5519 }
5520 if (UnpackFromEltSize > 4)
5521 return;
5522
5523 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
5524 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
5525 << ".\n";
5526 dumpBytes(Bytes, "Original Bytes vector:"););
5527
5528 // Apply the unpack in reverse to the Bytes array.
5529 unsigned B = 0;
5530 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
5531 Elt += UnpackFromEltSize;
5532 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
5533 Bytes[B] = Bytes[Elt];
5534 }
5535 while (B < SystemZ::VectorBytes)
5536 Bytes[B++] = -1;
5537
5538 // Remove the zero vector from Ops
5539 Ops.erase(&Ops[ZeroVecOpNo]);
5540 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5541 if (Bytes[I] >= 0) {
5542 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5543 if (OpNo > ZeroVecOpNo)
5544 Bytes[I] -= SystemZ::VectorBytes;
5545 }
5546
5547 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
5548 dbgs() << "\n";);
5549}
5550
5551SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
5552 const SDLoc &DL,
5553 SDValue Op) {
5554 if (!unpackWasPrepared())
5555 return Op;
5556 unsigned InBits = UnpackFromEltSize * 8;
5557 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
5558 SystemZ::VectorBits / InBits);
5559 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
5560 unsigned OutBits = InBits * 2;
5561 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
5562 SystemZ::VectorBits / OutBits);
5563 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
5564}
5565
5566// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
5568 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
5569 if (!Op.getOperand(I).isUndef())
5570 return false;
5571 return true;
5572}
5573
5574// Return a vector of type VT that contains Value in the first element.
5575// The other elements don't matter.
5577 SDValue Value) {
5578 // If we have a constant, replicate it to all elements and let the
5579 // BUILD_VECTOR lowering take care of it.
5580 if (Value.getOpcode() == ISD::Constant ||
5581 Value.getOpcode() == ISD::ConstantFP) {
5583 return DAG.getBuildVector(VT, DL, Ops);
5584 }
5585 if (Value.isUndef())
5586 return DAG.getUNDEF(VT);
5587 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
5588}
5589
5590// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
5591// element 1. Used for cases in which replication is cheap.
5593 SDValue Op0, SDValue Op1) {
5594 if (Op0.isUndef()) {
5595 if (Op1.isUndef())
5596 return DAG.getUNDEF(VT);
5597 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
5598 }
5599 if (Op1.isUndef())
5600 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
5601 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
5602 buildScalarToVector(DAG, DL, VT, Op0),
5603 buildScalarToVector(DAG, DL, VT, Op1));
5604}
5605
5606// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
5607// vector for them.
5609 SDValue Op1) {
5610 if (Op0.isUndef() && Op1.isUndef())
5611 return DAG.getUNDEF(MVT::v2i64);
5612 // If one of the two inputs is undefined then replicate the other one,
5613 // in order to avoid using another register unnecessarily.
5614 if (Op0.isUndef())
5615 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5616 else if (Op1.isUndef())
5617 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5618 else {
5619 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5620 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5621 }
5622 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
5623}
5624
5625// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
5626// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
5627// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
5628// would benefit from this representation and return it if so.
5630 BuildVectorSDNode *BVN) {
5631 EVT VT = BVN->getValueType(0);
5632 unsigned NumElements = VT.getVectorNumElements();
5633
5634 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
5635 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
5636 // need a BUILD_VECTOR, add an additional placeholder operand for that
5637 // BUILD_VECTOR and store its operands in ResidueOps.
5638 GeneralShuffle GS(VT);
5640 bool FoundOne = false;
5641 for (unsigned I = 0; I < NumElements; ++I) {
5642 SDValue Op = BVN->getOperand(I);
5643 if (Op.getOpcode() == ISD::TRUNCATE)
5644 Op = Op.getOperand(0);
5645 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5646 Op.getOperand(1).getOpcode() == ISD::Constant) {
5647 unsigned Elem = Op.getConstantOperandVal(1);
5648 if (!GS.add(Op.getOperand(0), Elem))
5649 return SDValue();
5650 FoundOne = true;
5651 } else if (Op.isUndef()) {
5652 GS.addUndef();
5653 } else {
5654 if (!GS.add(SDValue(), ResidueOps.size()))
5655 return SDValue();
5656 ResidueOps.push_back(BVN->getOperand(I));
5657 }
5658 }
5659
5660 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
5661 if (!FoundOne)
5662 return SDValue();
5663
5664 // Create the BUILD_VECTOR for the remaining elements, if any.
5665 if (!ResidueOps.empty()) {
5666 while (ResidueOps.size() < NumElements)
5667 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
5668 for (auto &Op : GS.Ops) {
5669 if (!Op.getNode()) {
5670 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
5671 break;
5672 }
5673 }
5674 }
5675 return GS.getNode(DAG, SDLoc(BVN));
5676}
5677
5678bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
5679 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
5680 return true;
5681 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
5682 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
5683 return true;
5684 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
5685 return true;
5686 return false;
5687}
5688
5689// Combine GPR scalar values Elems into a vector of type VT.
5690SDValue
5691SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5692 SmallVectorImpl<SDValue> &Elems) const {
5693 // See whether there is a single replicated value.
5695 unsigned int NumElements = Elems.size();
5696 unsigned int Count = 0;
5697 for (auto Elem : Elems) {
5698 if (!Elem.isUndef()) {
5699 if (!Single.getNode())
5700 Single = Elem;
5701 else if (Elem != Single) {
5702 Single = SDValue();
5703 break;
5704 }
5705 Count += 1;
5706 }
5707 }
5708 // There are three cases here:
5709 //
5710 // - if the only defined element is a loaded one, the best sequence
5711 // is a replicating load.
5712 //
5713 // - otherwise, if the only defined element is an i64 value, we will
5714 // end up with the same VLVGP sequence regardless of whether we short-cut
5715 // for replication or fall through to the later code.
5716 //
5717 // - otherwise, if the only defined element is an i32 or smaller value,
5718 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
5719 // This is only a win if the single defined element is used more than once.
5720 // In other cases we're better off using a single VLVGx.
5721 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
5722 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
5723
5724 // If all elements are loads, use VLREP/VLEs (below).
5725 bool AllLoads = true;
5726 for (auto Elem : Elems)
5727 if (!isVectorElementLoad(Elem)) {
5728 AllLoads = false;
5729 break;
5730 }
5731
5732 // The best way of building a v2i64 from two i64s is to use VLVGP.
5733 if (VT == MVT::v2i64 && !AllLoads)
5734 return joinDwords(DAG, DL, Elems[0], Elems[1]);
5735
5736 // Use a 64-bit merge high to combine two doubles.
5737 if (VT == MVT::v2f64 && !AllLoads)
5738 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5739
5740 // Build v4f32 values directly from the FPRs:
5741 //
5742 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
5743 // V V VMRHF
5744 // <ABxx> <CDxx>
5745 // V VMRHG
5746 // <ABCD>
5747 if (VT == MVT::v4f32 && !AllLoads) {
5748 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5749 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
5750 // Avoid unnecessary undefs by reusing the other operand.
5751 if (Op01.isUndef())
5752 Op01 = Op23;
5753 else if (Op23.isUndef())
5754 Op23 = Op01;
5755 // Merging identical replications is a no-op.
5756 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
5757 return Op01;
5758 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
5759 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
5761 DL, MVT::v2i64, Op01, Op23);
5762 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5763 }
5764
5765 // Collect the constant terms.
5768
5769 unsigned NumConstants = 0;
5770 for (unsigned I = 0; I < NumElements; ++I) {
5771 SDValue Elem = Elems[I];
5772 if (Elem.getOpcode() == ISD::Constant ||
5773 Elem.getOpcode() == ISD::ConstantFP) {
5774 NumConstants += 1;
5775 Constants[I] = Elem;
5776 Done[I] = true;
5777 }
5778 }
5779 // If there was at least one constant, fill in the other elements of
5780 // Constants with undefs to get a full vector constant and use that
5781 // as the starting point.
5783 SDValue ReplicatedVal;
5784 if (NumConstants > 0) {
5785 for (unsigned I = 0; I < NumElements; ++I)
5786 if (!Constants[I].getNode())
5787 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
5788 Result = DAG.getBuildVector(VT, DL, Constants);
5789 } else {
5790 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
5791 // avoid a false dependency on any previous contents of the vector
5792 // register.
5793
5794 // Use a VLREP if at least one element is a load. Make sure to replicate
5795 // the load with the most elements having its value.
5796 std::map<const SDNode*, unsigned> UseCounts;
5797 SDNode *LoadMaxUses = nullptr;
5798 for (unsigned I = 0; I < NumElements; ++I)
5799 if (isVectorElementLoad(Elems[I])) {
5800 SDNode *Ld = Elems[I].getNode();
5801 UseCounts[Ld]++;
5802 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
5803 LoadMaxUses = Ld;
5804 }
5805 if (LoadMaxUses != nullptr) {
5806 ReplicatedVal = SDValue(LoadMaxUses, 0);
5807 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
5808 } else {
5809 // Try to use VLVGP.
5810 unsigned I1 = NumElements / 2 - 1;
5811 unsigned I2 = NumElements - 1;
5812 bool Def1 = !Elems[I1].isUndef();
5813 bool Def2 = !Elems[I2].isUndef();
5814 if (Def1 || Def2) {
5815 SDValue Elem1 = Elems[Def1 ? I1 : I2];
5816 SDValue Elem2 = Elems[Def2 ? I2 : I1];
5817 Result = DAG.getNode(ISD::BITCAST, DL, VT,
5818 joinDwords(DAG, DL, Elem1, Elem2));
5819 Done[I1] = true;
5820 Done[I2] = true;
5821 } else
5822 Result = DAG.getUNDEF(VT);
5823 }
5824 }
5825
5826 // Use VLVGx to insert the other elements.
5827 for (unsigned I = 0; I < NumElements; ++I)
5828 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
5829 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
5830 DAG.getConstant(I, DL, MVT::i32));
5831 return Result;
5832}
5833
5834SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
5835 SelectionDAG &DAG) const {
5836 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
5837 SDLoc DL(Op);
5838 EVT VT = Op.getValueType();
5839
5840 if (BVN->isConstant()) {
5841 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
5842 return Op;
5843
5844 // Fall back to loading it from memory.
5845 return SDValue();
5846 }
5847
5848 // See if we should use shuffles to construct the vector from other vectors.
5849 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
5850 return Res;
5851
5852 // Detect SCALAR_TO_VECTOR conversions.
5854 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
5855
5856 // Otherwise use buildVector to build the vector up from GPRs.
5857 unsigned NumElements = Op.getNumOperands();
5859 for (unsigned I = 0; I < NumElements; ++I)
5860 Ops[I] = Op.getOperand(I);
5861 return buildVector(DAG, DL, VT, Ops);
5862}
5863
5864SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
5865 SelectionDAG &DAG) const {
5866 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
5867 SDLoc DL(Op);
5868 EVT VT = Op.getValueType();
5869 unsigned NumElements = VT.getVectorNumElements();
5870
5871 if (VSN->isSplat()) {
5872 SDValue Op0 = Op.getOperand(0);
5873 unsigned Index = VSN->getSplatIndex();
5875 "Splat index should be defined and in first operand");
5876 // See whether the value we're splatting is directly available as a scalar.
5877 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5879 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
5880 // Otherwise keep it as a vector-to-vector operation.
5881 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
5882 DAG.getTargetConstant(Index, DL, MVT::i32));
5883 }
5884
5885 GeneralShuffle GS(VT);
5886 for (unsigned I = 0; I < NumElements; ++I) {
5887 int Elt = VSN->getMaskElt(I);
5888 if (Elt < 0)
5889 GS.addUndef();
5890 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
5891 unsigned(Elt) % NumElements))
5892 return SDValue();
5893 }
5894 return GS.getNode(DAG, SDLoc(VSN));
5895}
5896
5897SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
5898 SelectionDAG &DAG) const {
5899 SDLoc DL(Op);
5900 // Just insert the scalar into element 0 of an undefined vector.
5901 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
5902 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
5903 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
5904}
5905
5906SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
5907 SelectionDAG &DAG) const {
5908 // Handle insertions of floating-point values.
5909 SDLoc DL(Op);
5910 SDValue Op0 = Op.getOperand(0);
5911 SDValue Op1 = Op.getOperand(1);
5912 SDValue Op2 = Op.getOperand(2);
5913 EVT VT = Op.getValueType();
5914
5915 // Insertions into constant indices of a v2f64 can be done using VPDI.
5916 // However, if the inserted value is a bitcast or a constant then it's
5917 // better to use GPRs, as below.
5918 if (VT == MVT::v2f64 &&
5919 Op1.getOpcode() != ISD::BITCAST &&
5920 Op1.getOpcode() != ISD::ConstantFP &&
5921 Op2.getOpcode() == ISD::Constant) {
5922 uint64_t Index = Op2->getAsZExtVal();
5923 unsigned Mask = VT.getVectorNumElements() - 1;
5924 if (Index <= Mask)
5925 return Op;
5926 }
5927
5928 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
5930 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
5931 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
5932 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
5933 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
5934 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5935}
5936
5937SDValue
5938SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
5939 SelectionDAG &DAG) const {
5940 // Handle extractions of floating-point values.
5941 SDLoc DL(Op);
5942 SDValue Op0 = Op.getOperand(0);
5943 SDValue Op1 = Op.getOperand(1);
5944 EVT VT = Op.getValueType();
5945 EVT VecVT = Op0.getValueType();
5946
5947 // Extractions of constant indices can be done directly.
5948 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
5949 uint64_t Index = CIndexN->getZExtValue();
5950 unsigned Mask = VecVT.getVectorNumElements() - 1;
5951 if (Index <= Mask)
5952 return Op;
5953 }
5954
5955 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
5956 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
5957 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
5958 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
5959 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
5960 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5961}
5962
5963SDValue SystemZTargetLowering::
5964lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5965 SDValue PackedOp = Op.getOperand(0);
5966 EVT OutVT = Op.getValueType();
5967 EVT InVT = PackedOp.getValueType();
5968 unsigned ToBits = OutVT.getScalarSizeInBits();
5969 unsigned FromBits = InVT.getScalarSizeInBits();
5970 do {
5971 FromBits *= 2;
5972 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
5973 SystemZ::VectorBits / FromBits);
5974 PackedOp =
5975 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
5976 } while (FromBits != ToBits);
5977 return PackedOp;
5978}
5979
5980// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
5981SDValue SystemZTargetLowering::
5982lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5983 SDValue PackedOp = Op.getOperand(0);
5984 SDLoc DL(Op);
5985 EVT OutVT = Op.getValueType();
5986 EVT InVT = PackedOp.getValueType();
5987 unsigned InNumElts = InVT.getVectorNumElements();
5988 unsigned OutNumElts = OutVT.getVectorNumElements();
5989 unsigned NumInPerOut = InNumElts / OutNumElts;
5990
5991 SDValue ZeroVec =
5992 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
5993
5994 SmallVector<int, 16> Mask(InNumElts);
5995 unsigned ZeroVecElt = InNumElts;
5996 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
5997 unsigned MaskElt = PackedElt * NumInPerOut;
5998 unsigned End = MaskElt + NumInPerOut - 1;
5999 for (; MaskElt < End; MaskElt++)
6000 Mask[MaskElt] = ZeroVecElt++;
6001 Mask[MaskElt] = PackedElt;
6002 }
6003 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
6004 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
6005}
6006
6007SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6008 unsigned ByScalar) const {
6009 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6010 SDValue Op0 = Op.getOperand(0);
6011 SDValue Op1 = Op.getOperand(1);
6012 SDLoc DL(Op);
6013 EVT VT = Op.getValueType();
6014 unsigned ElemBitSize = VT.getScalarSizeInBits();
6015
6016 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6017 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6018 APInt SplatBits, SplatUndef;
6019 unsigned SplatBitSize;
6020 bool HasAnyUndefs;
6021 // Check for constant splats. Use ElemBitSize as the minimum element
6022 // width and reject splats that need wider elements.
6023 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6024 ElemBitSize, true) &&
6025 SplatBitSize == ElemBitSize) {
6026 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6027 DL, MVT::i32);
6028 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6029 }
6030 // Check for variable splats.
6031 BitVector UndefElements;
6032 SDValue Splat = BVN->getSplatValue(&UndefElements);
6033 if (Splat) {
6034 // Since i32 is the smallest legal type, we either need a no-op
6035 // or a truncation.
6036 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6037 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6038 }
6039 }
6040
6041 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6042 // and the shift amount is directly available in a GPR.
6043 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6044 if (VSN->isSplat()) {
6045 SDValue VSNOp0 = VSN->getOperand(0);
6046 unsigned Index = VSN->getSplatIndex();
6048 "Splat index should be defined and in first operand");
6049 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6050 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6051 // Since i32 is the smallest legal type, we either need a no-op
6052 // or a truncation.
6053 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6054 VSNOp0.getOperand(Index));
6055 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6056 }
6057 }
6058 }
6059
6060 // Otherwise just treat the current form as legal.
6061 return Op;
6062}
6063
6064SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
6065 SelectionDAG &DAG) const {
6066 SDLoc DL(Op);
6067 MVT ResultVT = Op.getSimpleValueType();
6068 SDValue Arg = Op.getOperand(0);
6069 unsigned Check = Op.getConstantOperandVal(1);
6070
6071 unsigned TDCMask = 0;
6072 if (Check & fcSNan)
6074 if (Check & fcQNan)
6076 if (Check & fcPosInf)
6078 if (Check & fcNegInf)
6080 if (Check & fcPosNormal)
6082 if (Check & fcNegNormal)
6084 if (Check & fcPosSubnormal)
6086 if (Check & fcNegSubnormal)
6088 if (Check & fcPosZero)
6089 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
6090 if (Check & fcNegZero)
6091 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
6092 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
6093
6094 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
6095 return getCCResult(DAG, Intr);
6096}
6097
6098SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
6099 SelectionDAG &DAG) const {
6100 SDLoc DL(Op);
6101 SDValue Chain = Op.getOperand(0);
6102
6103 // STCKF only supports a memory operand, so we have to use a temporary.
6104 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
6105 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
6106 MachinePointerInfo MPI =
6108
6109 // Use STCFK to store the TOD clock into the temporary.
6110 SDValue StoreOps[] = {Chain, StackPtr};
6111 Chain = DAG.getMemIntrinsicNode(
6112 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
6114
6115 // And read it back from there.
6116 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
6117}
6118
6120 SelectionDAG &DAG) const {
6121 switch (Op.getOpcode()) {
6122 case ISD::FRAMEADDR:
6123 return lowerFRAMEADDR(Op, DAG);
6124 case ISD::RETURNADDR:
6125 return lowerRETURNADDR(Op, DAG);
6126 case ISD::BR_CC:
6127 return lowerBR_CC(Op, DAG);
6128 case ISD::SELECT_CC:
6129 return lowerSELECT_CC(Op, DAG);
6130 case ISD::SETCC:
6131 return lowerSETCC(Op, DAG);
6132 case ISD::STRICT_FSETCC:
6133 return lowerSTRICT_FSETCC(Op, DAG, false);
6135 return lowerSTRICT_FSETCC(Op, DAG, true);
6136 case ISD::GlobalAddress:
6137 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
6139 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
6140 case ISD::BlockAddress:
6141 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
6142 case ISD::JumpTable:
6143 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
6144 case ISD::ConstantPool:
6145 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
6146 case ISD::BITCAST:
6147 return lowerBITCAST(Op, DAG);
6148 case ISD::VASTART:
6149 return lowerVASTART(Op, DAG);
6150 case ISD::VACOPY:
6151 return lowerVACOPY(Op, DAG);
6153 return lowerDYNAMIC_STACKALLOC(Op, DAG);
6155 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
6156 case ISD::SMUL_LOHI:
6157 return lowerSMUL_LOHI(Op, DAG);
6158 case ISD::UMUL_LOHI:
6159 return lowerUMUL_LOHI(Op, DAG);
6160 case ISD::SDIVREM:
6161 return lowerSDIVREM(Op, DAG);
6162 case ISD::UDIVREM:
6163 return lowerUDIVREM(Op, DAG);
6164 case ISD::SADDO:
6165 case ISD::SSUBO:
6166 case ISD::UADDO:
6167 case ISD::USUBO:
6168 return lowerXALUO(Op, DAG);
6169 case ISD::UADDO_CARRY:
6170 case ISD::USUBO_CARRY:
6171 return lowerUADDSUBO_CARRY(Op, DAG);
6172 case ISD::OR:
6173 return lowerOR(Op, DAG);
6174 case ISD::CTPOP:
6175 return lowerCTPOP(Op, DAG);
6176 case ISD::VECREDUCE_ADD:
6177 return lowerVECREDUCE_ADD(Op, DAG);
6178 case ISD::ATOMIC_FENCE:
6179 return lowerATOMIC_FENCE(Op, DAG);
6180 case ISD::ATOMIC_SWAP:
6181 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
6182 case ISD::ATOMIC_STORE:
6183 case ISD::ATOMIC_LOAD:
6184 return lowerATOMIC_LDST_I128(Op, DAG);
6186 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
6188 return lowerATOMIC_LOAD_SUB(Op, DAG);
6190 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
6192 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
6194 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
6196 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
6198 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
6200 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
6202 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
6204 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
6206 return lowerATOMIC_CMP_SWAP(Op, DAG);
6207 case ISD::STACKSAVE:
6208 return lowerSTACKSAVE(Op, DAG);
6209 case ISD::STACKRESTORE:
6210 return lowerSTACKRESTORE(Op, DAG);
6211 case ISD::PREFETCH:
6212 return lowerPREFETCH(Op, DAG);
6214 return lowerINTRINSIC_W_CHAIN(Op, DAG);
6216 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
6217 case ISD::BUILD_VECTOR:
6218 return lowerBUILD_VECTOR(Op, DAG);
6220 return lowerVECTOR_SHUFFLE(Op, DAG);
6222 return lowerSCALAR_TO_VECTOR(Op, DAG);
6224 return lowerINSERT_VECTOR_ELT(Op, DAG);
6226 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6228 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
6230 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
6231 case ISD::SHL:
6232 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
6233 case ISD::SRL:
6234 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
6235 case ISD::SRA:
6236 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
6237 case ISD::ROTL:
6238 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
6239 case ISD::IS_FPCLASS:
6240 return lowerIS_FPCLASS(Op, DAG);
6241 case ISD::GET_ROUNDING:
6242 return lowerGET_ROUNDING(Op, DAG);
6244 return lowerREADCYCLECOUNTER(Op, DAG);
6245 default:
6246 llvm_unreachable("Unexpected node to lower");
6247 }
6248}
6249
6250// Lower operations with invalid operand or result types (currently used
6251// only for 128-bit integer types).
6252void
6255 SelectionDAG &DAG) const {
6256 switch (N->getOpcode()) {
6257 case ISD::ATOMIC_LOAD: {
6258 SDLoc DL(N);
6259 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
6260 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
6261 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6263 DL, Tys, Ops, MVT::i128, MMO);
6264 Results.push_back(lowerGR128ToI128(DAG, Res));
6265 Results.push_back(Res.getValue(1));
6266 break;
6267 }
6268 case ISD::ATOMIC_STORE: {
6269 SDLoc DL(N);
6270 SDVTList Tys = DAG.getVTList(MVT::Other);
6271 SDValue Ops[] = {N->getOperand(0), lowerI128ToGR128(DAG, N->getOperand(1)),
6272 N->getOperand(2)};
6273 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6275 DL, Tys, Ops, MVT::i128, MMO);
6276 // We have to enforce sequential consistency by performing a
6277 // serialization operation after the store.
6278 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
6280 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
6281 MVT::Other, Res), 0);
6282 Results.push_back(Res);
6283 break;
6284 }
6286 SDLoc DL(N);
6287 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
6288 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
6289 lowerI128ToGR128(DAG, N->getOperand(2)),
6290 lowerI128ToGR128(DAG, N->getOperand(3)) };
6291 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6293 DL, Tys, Ops, MVT::i128, MMO);
6294 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
6296 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
6297 Results.push_back(lowerGR128ToI128(DAG, Res));
6298 Results.push_back(Success);
6299 Results.push_back(Res.getValue(2));
6300 break;
6301 }
6302 case ISD::BITCAST: {
6303 SDValue Src = N->getOperand(0);
6304 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
6305 !useSoftFloat()) {
6306 SDLoc DL(N);
6307 SDValue Lo, Hi;
6308 if (getRepRegClassFor(MVT::f128) == &SystemZ::VR128BitRegClass) {
6309 SDValue VecBC = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Src);
6310 Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
6311 DAG.getConstant(1, DL, MVT::i32));
6312 Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
6313 DAG.getConstant(0, DL, MVT::i32));
6314 } else {
6315 assert(getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass &&
6316 "Unrecognized register class for f128.");
6317 SDValue LoFP = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
6318 DL, MVT::f64, Src);
6319 SDValue HiFP = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
6320 DL, MVT::f64, Src);
6321 Lo = DAG.getNode(ISD::BITCAST, DL, MVT::i64, LoFP);
6322 Hi = DAG.getNode(ISD::BITCAST, DL, MVT::i64, HiFP);
6323 }
6324 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi));
6325 }
6326 break;
6327 }
6328 default:
6329 llvm_unreachable("Unexpected node to lower");
6330 }
6331}
6332
6333void
6336 SelectionDAG &DAG) const {
6337 return LowerOperationWrapper(N, Results, DAG);
6338}
6339
6340const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
6341#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
6342 switch ((SystemZISD::NodeType)Opcode) {
6343 case SystemZISD::FIRST_NUMBER: break;
6344 OPCODE(RET_GLUE);
6345 OPCODE(CALL);
6346 OPCODE(SIBCALL);
6347 OPCODE(TLS_GDCALL);
6348 OPCODE(TLS_LDCALL);
6349 OPCODE(PCREL_WRAPPER);
6350 OPCODE(PCREL_OFFSET);
6351 OPCODE(ICMP);
6352 OPCODE(FCMP);
6353 OPCODE(STRICT_FCMP);
6354 OPCODE(STRICT_FCMPS);
6355 OPCODE(TM);
6356 OPCODE(BR_CCMASK);
6357 OPCODE(SELECT_CCMASK);
6358 OPCODE(ADJDYNALLOC);
6359 OPCODE(PROBED_ALLOCA);
6360 OPCODE(POPCNT);
6361 OPCODE(SMUL_LOHI);
6362 OPCODE(UMUL_LOHI);
6363 OPCODE(SDIVREM);
6364 OPCODE(UDIVREM);
6365 OPCODE(SADDO);
6366 OPCODE(SSUBO);
6367 OPCODE(UADDO);
6368 OPCODE(USUBO);
6369 OPCODE(ADDCARRY);
6370 OPCODE(SUBCARRY);
6371 OPCODE(GET_CCMASK);
6372 OPCODE(MVC);
6373 OPCODE(NC);
6374 OPCODE(OC);
6375 OPCODE(XC);
6376 OPCODE(CLC);
6377 OPCODE(MEMSET_MVC);
6378 OPCODE(STPCPY);
6379 OPCODE(STRCMP);
6380 OPCODE(SEARCH_STRING);
6381 OPCODE(IPM);
6382 OPCODE(TBEGIN);
6383 OPCODE(TBEGIN_NOFLOAT);
6384 OPCODE(TEND);
6385 OPCODE(BYTE_MASK);
6386 OPCODE(ROTATE_MASK);
6387 OPCODE(REPLICATE);
6388 OPCODE(JOIN_DWORDS);
6389 OPCODE(SPLAT);
6390 OPCODE(MERGE_HIGH);
6391 OPCODE(MERGE_LOW);
6392 OPCODE(SHL_DOUBLE);
6393 OPCODE(PERMUTE_DWORDS);
6394 OPCODE(PERMUTE);
6395 OPCODE(PACK);
6396 OPCODE(PACKS_CC);
6397 OPCODE(PACKLS_CC);
6398 OPCODE(UNPACK_HIGH);
6399 OPCODE(UNPACKL_HIGH);
6400 OPCODE(UNPACK_LOW);
6401 OPCODE(UNPACKL_LOW);
6402 OPCODE(VSHL_BY_SCALAR);
6403 OPCODE(VSRL_BY_SCALAR);
6404 OPCODE(VSRA_BY_SCALAR);
6405 OPCODE(VROTL_BY_SCALAR);
6406 OPCODE(VSUM);
6407 OPCODE(VACC);
6408 OPCODE(VSCBI);
6409 OPCODE(VAC);
6410 OPCODE(VSBI);
6411 OPCODE(VACCC);
6412 OPCODE(VSBCBI);
6413 OPCODE(VICMPE);
6414 OPCODE(VICMPH);
6415 OPCODE(VICMPHL);
6416 OPCODE(VICMPES);
6417 OPCODE(VICMPHS);
6418 OPCODE(VICMPHLS);
6419 OPCODE(VFCMPE);
6420 OPCODE(STRICT_VFCMPE);
6421 OPCODE(STRICT_VFCMPES);
6422 OPCODE(VFCMPH);
6423 OPCODE(STRICT_VFCMPH);
6424 OPCODE(STRICT_VFCMPHS);
6425 OPCODE(VFCMPHE);
6426 OPCODE(STRICT_VFCMPHE);
6427 OPCODE(STRICT_VFCMPHES);
6428 OPCODE(VFCMPES);
6429 OPCODE(VFCMPHS);
6430 OPCODE(VFCMPHES);
6431 OPCODE(VFTCI);
6432 OPCODE(VEXTEND);
6433 OPCODE(STRICT_VEXTEND);
6434 OPCODE(VROUND);
6435 OPCODE(STRICT_VROUND);
6436 OPCODE(VTM);
6437 OPCODE(SCMP128HI);
6438 OPCODE(UCMP128HI);
6439 OPCODE(VFAE_CC);
6440 OPCODE(VFAEZ_CC);
6441 OPCODE(VFEE_CC);
6442 OPCODE(VFEEZ_CC);
6443 OPCODE(VFENE_CC);
6444 OPCODE(VFENEZ_CC);
6445 OPCODE(VISTR_CC);
6446 OPCODE(VSTRC_CC);
6447 OPCODE(VSTRCZ_CC);
6448 OPCODE(VSTRS_CC);
6449 OPCODE(VSTRSZ_CC);
6450 OPCODE(TDC);
6451 OPCODE(ATOMIC_SWAPW);
6452 OPCODE(ATOMIC_LOADW_ADD);
6453 OPCODE(ATOMIC_LOADW_SUB);
6454 OPCODE(ATOMIC_LOADW_AND);
6455 OPCODE(ATOMIC_LOADW_OR);
6456 OPCODE(ATOMIC_LOADW_XOR);
6457 OPCODE(ATOMIC_LOADW_NAND);
6458 OPCODE(ATOMIC_LOADW_MIN);
6459 OPCODE(ATOMIC_LOADW_MAX);
6460 OPCODE(ATOMIC_LOADW_UMIN);
6461 OPCODE(ATOMIC_LOADW_UMAX);
6462 OPCODE(ATOMIC_CMP_SWAPW);
6463 OPCODE(ATOMIC_CMP_SWAP);
6464 OPCODE(ATOMIC_LOAD_128);
6465 OPCODE(ATOMIC_STORE_128);
6466 OPCODE(ATOMIC_CMP_SWAP_128);
6467 OPCODE(LRV);
6468 OPCODE(STRV);
6469 OPCODE(VLER);
6470 OPCODE(VSTER);
6471 OPCODE(STCKF);
6473 OPCODE(ADA_ENTRY);
6474 }
6475 return nullptr;
6476#undef OPCODE
6477}
6478
6479// Return true if VT is a vector whose elements are a whole number of bytes
6480// in width. Also check for presence of vector support.
6481bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
6482 if (!Subtarget.hasVector())
6483 return false;
6484
6485 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
6486}
6487
6488// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
6489// producing a result of type ResVT. Op is a possibly bitcast version
6490// of the input vector and Index is the index (based on type VecVT) that
6491// should be extracted. Return the new extraction if a simplification
6492// was possible or if Force is true.
6493SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
6494 EVT VecVT, SDValue Op,
6495 unsigned Index,
6496 DAGCombinerInfo &DCI,
6497 bool Force) const {
6498 SelectionDAG &DAG = DCI.DAG;
6499
6500 // The number of bytes being extracted.
6501 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6502
6503 for (;;) {
6504 unsigned Opcode = Op.getOpcode();
6505 if (Opcode == ISD::BITCAST)
6506 // Look through bitcasts.
6507 Op = Op.getOperand(0);
6508 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
6509 canTreatAsByteVector(Op.getValueType())) {
6510 // Get a VPERM-like permute mask and see whether the bytes covered
6511 // by the extracted element are a contiguous sequence from one
6512 // source operand.
6514 if (!getVPermMask(Op, Bytes))
6515 break;
6516 int First;
6517 if (!getShuffleInput(Bytes, Index * BytesPerElement,
6518 BytesPerElement, First))
6519 break;
6520 if (First < 0)
6521 return DAG.getUNDEF(ResVT);
6522 // Make sure the contiguous sequence starts at a multiple of the
6523 // original element size.
6524 unsigned Byte = unsigned(First) % Bytes.size();
6525 if (Byte % BytesPerElement != 0)
6526 break;
6527 // We can get the extracted value directly from an input.
6528 Index = Byte / BytesPerElement;
6529 Op = Op.getOperand(unsigned(First) / Bytes.size());
6530 Force = true;
6531 } else if (Opcode == ISD::BUILD_VECTOR &&
6532 canTreatAsByteVector(Op.getValueType())) {
6533 // We can only optimize this case if the BUILD_VECTOR elements are
6534 // at least as wide as the extracted value.
6535 EVT OpVT = Op.getValueType();
6536 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6537 if (OpBytesPerElement < BytesPerElement)
6538 break;
6539 // Make sure that the least-significant bit of the extracted value
6540 // is the least significant bit of an input.
6541 unsigned End = (Index + 1) * BytesPerElement;
6542 if (End % OpBytesPerElement != 0)
6543 break;
6544 // We're extracting the low part of one operand of the BUILD_VECTOR.
6545 Op = Op.getOperand(End / OpBytesPerElement - 1);
6546 if (!Op.getValueType().isInteger()) {
6547 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
6548 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
6549 DCI.AddToWorklist(Op.getNode());
6550 }
6551 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
6552 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
6553 if (VT != ResVT) {
6554 DCI.AddToWorklist(Op.getNode());
6555 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
6556 }
6557 return Op;
6558 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6560 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6561 canTreatAsByteVector(Op.getValueType()) &&
6562 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
6563 // Make sure that only the unextended bits are significant.
6564 EVT ExtVT = Op.getValueType();
6565 EVT OpVT = Op.getOperand(0).getValueType();
6566 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
6567 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6568 unsigned Byte = Index * BytesPerElement;
6569 unsigned SubByte = Byte % ExtBytesPerElement;
6570 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
6571 if (SubByte < MinSubByte ||
6572 SubByte + BytesPerElement > ExtBytesPerElement)
6573 break;
6574 // Get the byte offset of the unextended element
6575 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
6576 // ...then add the byte offset relative to that element.
6577 Byte += SubByte - MinSubByte;
6578 if (Byte % BytesPerElement != 0)
6579 break;
6580 Op = Op.getOperand(0);
6581 Index = Byte / BytesPerElement;
6582 Force = true;
6583 } else
6584 break;
6585 }
6586 if (Force) {
6587 if (Op.getValueType() != VecVT) {
6588 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
6589 DCI.AddToWorklist(Op.getNode());
6590 }
6591 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
6592 DAG.getConstant(Index, DL, MVT::i32));
6593 }
6594 return SDValue();
6595}
6596
6597// Optimize vector operations in scalar value Op on the basis that Op
6598// is truncated to TruncVT.
6599SDValue SystemZTargetLowering::combineTruncateExtract(
6600 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
6601 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
6602 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
6603 // of type TruncVT.
6604 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6605 TruncVT.getSizeInBits() % 8 == 0) {
6606 SDValue Vec = Op.getOperand(0);
6607 EVT VecVT = Vec.getValueType();
6608 if (canTreatAsByteVector(VecVT)) {
6609 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
6610 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6611 unsigned TruncBytes = TruncVT.getStoreSize();
6612 if (BytesPerElement % TruncBytes == 0) {
6613 // Calculate the value of Y' in the above description. We are
6614 // splitting the original elements into Scale equal-sized pieces
6615 // and for truncation purposes want the last (least-significant)
6616 // of these pieces for IndexN. This is easiest to do by calculating
6617 // the start index of the following element and then subtracting 1.
6618 unsigned Scale = BytesPerElement / TruncBytes;
6619 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
6620
6621 // Defer the creation of the bitcast from X to combineExtract,
6622 // which might be able to optimize the extraction.
6623 VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
6624 VecVT.getStoreSize() / TruncBytes);
6625 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
6626 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
6627 }
6628 }
6629 }
6630 }
6631 return SDValue();
6632}
6633
6634SDValue SystemZTargetLowering::combineZERO_EXTEND(
6635 SDNode *N, DAGCombinerInfo &DCI) const {
6636 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
6637 SelectionDAG &DAG = DCI.DAG;
6638 SDValue N0 = N->getOperand(0);
6639 EVT VT = N->getValueType(0);
6641 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
6642 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6643 if (TrueOp && FalseOp) {
6644 SDLoc DL(N0);
6645 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
6646 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
6647 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
6648 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
6649 // If N0 has multiple uses, change other uses as well.
6650 if (!N0.hasOneUse()) {
6651 SDValue TruncSelect =
6652 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
6653 DCI.CombineTo(N0.getNode(), TruncSelect);
6654 }
6655 return NewSelect;
6656 }
6657 }
6658 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
6659 // of the result is smaller than the size of X and all the truncated bits
6660 // of X are already zero.
6661 if (N0.getOpcode() == ISD::XOR &&
6662 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
6663 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6664 N0.getOperand(1).getOpcode() == ISD::Constant) {
6665 SDValue X = N0.getOperand(0).getOperand(0);
6666 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
6667 KnownBits Known = DAG.computeKnownBits(X);
6668 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
6669 N0.getValueSizeInBits(),
6670 VT.getSizeInBits());
6671 if (TruncatedBits.isSubsetOf(Known.Zero)) {
6672 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
6674 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
6675 X, DAG.getConstant(Mask, SDLoc(N0), VT));
6676 }
6677 }
6678 }
6679
6680 return SDValue();
6681}
6682
6683SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
6684 SDNode *N, DAGCombinerInfo &DCI) const {
6685 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
6686 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
6687 // into (select_cc LHS, RHS, -1, 0, COND)
6688 SelectionDAG &DAG = DCI.DAG;
6689 SDValue N0 = N->getOperand(0);
6690 EVT VT = N->getValueType(0);
6691 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
6692 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
6693 N0 = N0.getOperand(0);
6694 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
6695 SDLoc DL(N0);
6696 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
6697 DAG.getConstant(-1, DL, VT), DAG.getConstant(0, DL, VT),
6698 N0.getOperand(2) };
6699 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
6700 }
6701 return SDValue();
6702}
6703
6704SDValue SystemZTargetLowering::combineSIGN_EXTEND(
6705 SDNode *N, DAGCombinerInfo &DCI) const {
6706 // Convert (sext (ashr (shl X, C1), C2)) to
6707 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
6708 // cheap as narrower ones.
6709 SelectionDAG &DAG = DCI.DAG;
6710 SDValue N0 = N->getOperand(0);
6711 EVT VT = N->getValueType(0);
6712 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
6713 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6714 SDValue Inner = N0.getOperand(0);
6715 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
6716 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
6717 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
6718 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
6719 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
6720 EVT ShiftVT = N0.getOperand(1).getValueType();
6721 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
6722 Inner.getOperand(0));
6723 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
6724 DAG.getConstant(NewShlAmt, SDLoc(Inner),
6725 ShiftVT));
6726 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
6727 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
6728 }
6729 }
6730 }
6731
6732 return SDValue();
6733}
6734
6735SDValue SystemZTargetLowering::combineMERGE(
6736 SDNode *N, DAGCombinerInfo &DCI) const {
6737 SelectionDAG &DAG = DCI.DAG;
6738 unsigned Opcode = N->getOpcode();
6739 SDValue Op0 = N->getOperand(0);
6740 SDValue Op1 = N->getOperand(1);
6741 if (Op0.getOpcode() == ISD::BITCAST)
6742 Op0 = Op0.getOperand(0);
6744 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
6745 // for v4f32.
6746 if (Op1 == N->getOperand(0))
6747 return Op1;
6748 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
6749 EVT VT = Op1.getValueType();
6750 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
6751 if (ElemBytes <= 4) {
6752 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
6755 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
6756 SystemZ::VectorBytes / ElemBytes / 2);
6757 if (VT != InVT) {
6758 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
6759 DCI.AddToWorklist(Op1.getNode());
6760 }
6761 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
6762 DCI.AddToWorklist(Op.getNode());
6763 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
6764 }
6765 }
6766 return SDValue();
6767}
6768
6769SDValue SystemZTargetLowering::combineLOAD(
6770 SDNode *N, DAGCombinerInfo &DCI) const {
6771 SelectionDAG &DAG = DCI.DAG;
6772 EVT LdVT = N->getValueType(0);
6773 SDLoc DL(N);
6774
6775 // Replace an i128 load that is used solely to move its value into GPRs
6776 // by separate loads of both halves.
6777 if (LdVT == MVT::i128) {
6778 LoadSDNode *LD = cast<LoadSDNode>(N);
6779 if (!LD->isSimple() || !ISD::isNormalLoad(LD))
6780 return SDValue();
6781
6782 // Scan through all users.
6784 int UsedElements = 0;
6785 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
6786 UI != UIEnd; ++UI) {
6787 // Skip the uses of the chain.
6788 if (UI.getUse().getResNo() != 0)
6789 continue;
6790
6791 // Verify every user is a TRUNCATE to i64 of the low or high half ...
6792 SDNode *User = *UI;
6793 int Index = 1;
6794 if (User->getOpcode() == ISD::SRL &&
6795 User->getOperand(1).getOpcode() == ISD::Constant &&
6796 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
6797 User = *User->use_begin();
6798 Index = 0;
6799 }
6800 if (User->getOpcode() != ISD::TRUNCATE ||
6801 User->getValueType(0) != MVT::i64)
6802 return SDValue();
6803
6804 // ... and no half is extracted twice.
6805 if (UsedElements & (1 << Index))
6806 return SDValue();
6807
6808 UsedElements |= 1 << Index;
6809 Users.push_back(std::make_pair(User, Index));
6810 }
6811
6812 // Rewrite each extraction as an independent load.
6813 SmallVector<SDValue, 2> ArgChains;
6814 for (auto UserAndIndex : Users) {
6815 SDNode *User = UserAndIndex.first;
6816 unsigned Offset = User->getValueType(0).getStoreSize() * UserAndIndex.second;
6817 SDValue Ptr =
6818 DAG.getMemBasePlusOffset(LD->getBasePtr(), TypeSize::getFixed(Offset), DL);
6819 SDValue EltLoad =
6820 DAG.getLoad(User->getValueType(0), DL, LD->getChain(), Ptr,
6821 LD->getPointerInfo().getWithOffset(Offset),
6822 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
6823 LD->getAAInfo());
6824
6825 DCI.CombineTo(User, EltLoad, true);
6826 ArgChains.push_back(EltLoad.getValue(1));
6827 }
6828
6829 // Collect all chains via TokenFactor.
6830 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
6831 ArgChains);
6832 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
6833 DCI.AddToWorklist(Chain.getNode());
6834 return SDValue(N, 0);
6835 }
6836
6837 if (LdVT.isVector() || LdVT.isInteger())
6838 return SDValue();
6839 // Transform a scalar load that is REPLICATEd as well as having other
6840 // use(s) to the form where the other use(s) use the first element of the
6841 // REPLICATE instead of the load. Otherwise instruction selection will not
6842 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
6843 // point loads.
6844
6845 SDValue Replicate;
6846 SmallVector<SDNode*, 8> OtherUses;
6847 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
6848 UI != UE; ++UI) {
6849 if (UI->getOpcode() == SystemZISD::REPLICATE) {
6850 if (Replicate)
6851 return SDValue(); // Should never happen
6852 Replicate = SDValue(*UI, 0);
6853 }
6854 else if (UI.getUse().getResNo() == 0)
6855 OtherUses.push_back(*UI);
6856 }
6857 if (!Replicate || OtherUses.empty())
6858 return SDValue();
6859
6860 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
6861 Replicate, DAG.getConstant(0, DL, MVT::i32));
6862 // Update uses of the loaded Value while preserving old chains.
6863 for (SDNode *U : OtherUses) {
6865 for (SDValue Op : U->ops())
6866 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
6867 DAG.UpdateNodeOperands(U, Ops);
6868 }
6869 return SDValue(N, 0);
6870}
6871
6872bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
6873 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
6874 return true;
6875 if (Subtarget.hasVectorEnhancements2())
6876 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
6877 return true;
6878 return false;
6879}
6880
6882 if (!VT.isVector() || !VT.isSimple() ||
6883 VT.getSizeInBits() != 128 ||
6884 VT.getScalarSizeInBits() % 8 != 0)
6885 return false;
6886
6887 unsigned NumElts = VT.getVectorNumElements();
6888 for (unsigned i = 0; i < NumElts; ++i) {
6889 if (M[i] < 0) continue; // ignore UNDEF indices
6890 if ((unsigned) M[i] != NumElts - 1 - i)
6891 return false;
6892 }
6893
6894 return true;
6895}
6896
6897static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
6898 for (auto *U : StoredVal->uses()) {
6899 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
6900 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
6901 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
6902 continue;
6903 } else if (isa<BuildVectorSDNode>(U)) {
6904 SDValue BuildVector = SDValue(U, 0);
6905 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
6906 isOnlyUsedByStores(BuildVector, DAG))
6907 continue;
6908 }
6909 return false;
6910 }
6911 return true;
6912}
6913
6914static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart) {
6915 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
6916 return false;
6917
6918 SDValue Op0 = Val.getOperand(0);
6919 SDValue Op1 = Val.getOperand(1);
6920
6921 if (Op0.getOpcode() == ISD::SHL)
6922 std::swap(Op0, Op1);
6923 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
6924 Op1.getOperand(1).getOpcode() != ISD::Constant ||
6925 Op1.getConstantOperandVal(1) != 64)
6926 return false;
6927 Op1 = Op1.getOperand(0);
6928
6929 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
6930 Op0.getOperand(0).getValueType() != MVT::i64)
6931 return false;
6932 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
6933 Op1.getOperand(0).getValueType() != MVT::i64)
6934 return false;
6935
6936 LoPart = Op0.getOperand(0);
6937 HiPart = Op1.getOperand(0);
6938 return true;
6939}
6940
6941SDValue SystemZTargetLowering::combineSTORE(
6942 SDNode *N, DAGCombinerInfo &DCI) const {
6943 SelectionDAG &DAG = DCI.DAG;
6944 auto *SN = cast<StoreSDNode>(N);
6945 auto &Op1 = N->getOperand(1);
6946 EVT MemVT = SN->getMemoryVT();
6947 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
6948 // for the extraction to be done on a vMiN value, so that we can use VSTE.
6949 // If X has wider elements then convert it to:
6950 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
6951 if (MemVT.isInteger() && SN->isTruncatingStore()) {
6952 if (SDValue Value =
6953 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
6954 DCI.AddToWorklist(Value.getNode());
6955
6956 // Rewrite the store with the new form of stored value.
6957 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
6958 SN->getBasePtr(), SN->getMemoryVT(),
6959 SN->getMemOperand());
6960 }
6961 }
6962 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
6963 if (!SN->isTruncatingStore() &&
6964 Op1.getOpcode() == ISD::BSWAP &&
6965 Op1.getNode()->hasOneUse() &&
6966 canLoadStoreByteSwapped(Op1.getValueType())) {
6967
6968 SDValue BSwapOp = Op1.getOperand(0);
6969
6970 if (BSwapOp.getValueType() == MVT::i16)
6971 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
6972
6973 SDValue Ops[] = {
6974 N->getOperand(0), BSwapOp, N->getOperand(2)
6975 };
6976
6977 return
6978 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
6979 Ops, MemVT, SN->getMemOperand());
6980 }
6981 // Combine STORE (element-swap) into VSTER
6982 if (!SN->isTruncatingStore() &&
6983 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
6984 Op1.getNode()->hasOneUse() &&
6985 Subtarget.hasVectorEnhancements2()) {
6986 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
6987 ArrayRef<int> ShuffleMask = SVN->getMask();
6988 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
6989 SDValue Ops[] = {
6990 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
6991 };
6992
6994 DAG.getVTList(MVT::Other),
6995 Ops, MemVT, SN->getMemOperand());
6996 }
6997 }
6998
6999 // Combine STORE (READCYCLECOUNTER) into STCKF.
7000 if (!SN->isTruncatingStore() &&
7002 Op1.hasOneUse() &&
7003 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
7004 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
7006 DAG.getVTList(MVT::Other),
7007 Ops, MemVT, SN->getMemOperand());
7008 }
7009
7010 // Transform a store of an i128 moved from GPRs into two separate stores.
7011 if (MemVT == MVT::i128 && SN->isSimple() && ISD::isNormalStore(SN)) {
7012 SDValue LoPart, HiPart;
7013 if (isMovedFromParts(Op1, LoPart, HiPart)) {
7014 SDLoc DL(SN);
7015 SDValue Chain0 =
7016 DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(),
7017 SN->getPointerInfo(), SN->getOriginalAlign(),
7018 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7019 SDValue Chain1 =
7020 DAG.getStore(SN->getChain(), DL, LoPart,
7021 DAG.getObjectPtrOffset(DL, SN->getBasePtr(),
7023 SN->getPointerInfo().getWithOffset(8),
7024 SN->getOriginalAlign(),
7025 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7026
7027 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
7028 }
7029 }
7030
7031 // Replicate a reg or immediate with VREP instead of scalar multiply or
7032 // immediate load. It seems best to do this during the first DAGCombine as
7033 // it is straight-forward to handle the zero-extend node in the initial
7034 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
7035 // extracting an i16 element from a v16i8 vector).
7036 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
7037 isOnlyUsedByStores(Op1, DAG)) {
7038 SDValue Word = SDValue();
7039 EVT WordVT;
7040
7041 // Find a replicated immediate and return it if found in Word and its
7042 // type in WordVT.
7043 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
7044 // Some constants are better handled with a scalar store.
7045 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
7046 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
7047 return;
7048 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, C->getZExtValue()));
7049 if (VCI.isVectorConstantLegal(Subtarget) &&
7050 VCI.Opcode == SystemZISD::REPLICATE) {
7051 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
7052 WordVT = VCI.VecVT.getScalarType();
7053 }
7054 };
7055
7056 // Find a replicated register and return it if found in Word and its type
7057 // in WordVT.
7058 auto FindReplicatedReg = [&](SDValue MulOp) {
7059 EVT MulVT = MulOp.getValueType();
7060 if (MulOp->getOpcode() == ISD::MUL &&
7061 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
7062 // Find a zero extended value and its type.
7063 SDValue LHS = MulOp->getOperand(0);
7064 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
7065 WordVT = LHS->getOperand(0).getValueType();
7066 else if (LHS->getOpcode() == ISD::AssertZext)
7067 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
7068 else
7069 return;
7070 // Find a replicating constant, e.g. 0x00010001.
7071 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
7073 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
7074 if (VCI.isVectorConstantLegal(Subtarget) &&
7075 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
7076 WordVT == VCI.VecVT.getScalarType())
7077 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
7078 }
7079 }
7080 };
7081
7082 if (isa<BuildVectorSDNode>(Op1) &&
7083 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
7084 SDValue SplatVal = Op1->getOperand(0);
7085 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
7086 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
7087 else
7088 FindReplicatedReg(SplatVal);
7089 } else {
7090 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
7091 FindReplicatedImm(C, MemVT.getStoreSize());
7092 else
7093 FindReplicatedReg(Op1);
7094 }
7095
7096 if (Word != SDValue()) {
7097 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
7098 "Bad type handling");
7099 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
7100 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
7101 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
7102 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
7103 SN->getBasePtr(), SN->getMemOperand());
7104 }
7105 }
7106
7107 return SDValue();
7108}
7109
7110SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
7111 SDNode *N, DAGCombinerInfo &DCI) const {
7112 SelectionDAG &DAG = DCI.DAG;
7113 // Combine element-swap (LOAD) into VLER
7114 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7115 N->getOperand(0).hasOneUse() &&
7116 Subtarget.hasVectorEnhancements2()) {
7117 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
7118 ArrayRef<int> ShuffleMask = SVN->getMask();
7119 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
7120 SDValue Load = N->getOperand(0);
7121 LoadSDNode *LD = cast<LoadSDNode>(Load);
7122
7123 // Create the element-swapping load.
7124 SDValue Ops[] = {
7125 LD->getChain(), // Chain
7126 LD->getBasePtr() // Ptr
7127 };
7128 SDValue ESLoad =
7130 DAG.getVTList(LD->getValueType(0), MVT::Other),
7131 Ops, LD->getMemoryVT(), LD->getMemOperand());
7132
7133 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
7134 // by the load dead.
7135 DCI.CombineTo(N, ESLoad);
7136
7137 // Next, combine the load away, we give it a bogus result value but a real
7138 // chain result. The result value is dead because the shuffle is dead.
7139 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
7140
7141 // Return N so it doesn't get rechecked!
7142 return SDValue(N, 0);
7143 }
7144 }
7145
7146 return SDValue();
7147}
7148
7149SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
7150 SDNode *N, DAGCombinerInfo &DCI) const {
7151 SelectionDAG &DAG = DCI.DAG;
7152
7153 if (!Subtarget.hasVector())
7154 return SDValue();
7155
7156 // Look through bitcasts that retain the number of vector elements.
7157 SDValue Op = N->getOperand(0);
7158 if (Op.getOpcode() == ISD::BITCAST &&
7159 Op.getValueType().isVector() &&
7160 Op.getOperand(0).getValueType().isVector() &&
7161 Op.getValueType().getVectorNumElements() ==
7162 Op.getOperand(0).getValueType().getVectorNumElements())
7163 Op = Op.getOperand(0);
7164
7165 // Pull BSWAP out of a vector extraction.
7166 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
7167 EVT VecVT = Op.getValueType();
7168 EVT EltVT = VecVT.getVectorElementType();
7169 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
7170 Op.getOperand(0), N->getOperand(1));
7171 DCI.AddToWorklist(Op.getNode());
7172 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
7173 if (EltVT != N->getValueType(0)) {
7174 DCI.AddToWorklist(Op.getNode());
7175 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
7176 }
7177 return Op;
7178 }
7179
7180 // Try to simplify a vector extraction.
7181 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
7182 SDValue Op0 = N->getOperand(0);
7183 EVT VecVT = Op0.getValueType();
7184 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
7185 IndexN->getZExtValue(), DCI, false);
7186 }
7187 return SDValue();
7188}
7189
7190SDValue SystemZTargetLowering::combineJOIN_DWORDS(
7191 SDNode *N, DAGCombinerInfo &DCI) const {
7192 SelectionDAG &DAG = DCI.DAG;
7193 // (join_dwords X, X) == (replicate X)
7194 if (N->getOperand(0) == N->getOperand(1))
7195 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
7196 N->getOperand(0));
7197 return SDValue();
7198}
7199
7201 SDValue Chain1 = N1->getOperand(0);
7202 SDValue Chain2 = N2->getOperand(0);
7203
7204 // Trivial case: both nodes take the same chain.
7205 if (Chain1 == Chain2)
7206 return Chain1;
7207
7208 // FIXME - we could handle more complex cases via TokenFactor,
7209 // assuming we can verify that this would not create a cycle.
7210 return SDValue();
7211}
7212
7213SDValue SystemZTargetLowering::combineFP_ROUND(
7214 SDNode *N, DAGCombinerInfo &DCI) const {
7215
7216 if (!Subtarget.hasVector())
7217 return SDValue();
7218
7219 // (fpround (extract_vector_elt X 0))
7220 // (fpround (extract_vector_elt X 1)) ->
7221 // (extract_vector_elt (VROUND X) 0)
7222 // (extract_vector_elt (VROUND X) 2)
7223 //
7224 // This is a special case since the target doesn't really support v2f32s.
7225 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7226 SelectionDAG &DAG = DCI.DAG;
7227 SDValue Op0 = N->getOperand(OpNo);
7228 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
7230 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
7231 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7232 Op0.getConstantOperandVal(1) == 0) {
7233 SDValue Vec = Op0.getOperand(0);
7234 for (auto *U : Vec->uses()) {
7235 if (U != Op0.getNode() && U->hasOneUse() &&
7236 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7237 U->getOperand(0) == Vec &&
7238 U->getOperand(1).getOpcode() == ISD::Constant &&
7239 U->getConstantOperandVal(1) == 1) {
7240 SDValue OtherRound = SDValue(*U->use_begin(), 0);
7241 if (OtherRound.getOpcode() == N->getOpcode() &&
7242 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
7243 OtherRound.getValueType() == MVT::f32) {
7244 SDValue VRound, Chain;
7245 if (N->isStrictFPOpcode()) {
7246 Chain = MergeInputChains(N, OtherRound.getNode());
7247 if (!Chain)
7248 continue;
7250 {MVT::v4f32, MVT::Other}, {Chain, Vec});
7251 Chain = VRound.getValue(1);
7252 } else
7253 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
7254 MVT::v4f32, Vec);
7255 DCI.AddToWorklist(VRound.getNode());
7256 SDValue Extract1 =
7257 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
7258 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
7259 DCI.AddToWorklist(Extract1.getNode());
7260 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
7261 if (Chain)
7262 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
7263 SDValue Extract0 =
7264 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
7265 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7266 if (Chain)
7267 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7268 N->getVTList(), Extract0, Chain);
7269 return Extract0;
7270 }
7271 }
7272 }
7273 }
7274 return SDValue();
7275}
7276
7277SDValue SystemZTargetLowering::combineFP_EXTEND(
7278 SDNode *N, DAGCombinerInfo &DCI) const {
7279
7280 if (!Subtarget.hasVector())
7281 return SDValue();
7282
7283 // (fpextend (extract_vector_elt X 0))
7284 // (fpextend (extract_vector_elt X 2)) ->
7285 // (extract_vector_elt (VEXTEND X) 0)
7286 // (extract_vector_elt (VEXTEND X) 1)
7287 //
7288 // This is a special case since the target doesn't really support v2f32s.
7289 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7290 SelectionDAG &DAG = DCI.DAG;
7291 SDValue Op0 = N->getOperand(OpNo);
7292 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
7294 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
7295 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7296 Op0.getConstantOperandVal(1) == 0) {
7297 SDValue Vec = Op0.getOperand(0);
7298 for (auto *U : Vec->uses()) {
7299 if (U != Op0.getNode() && U->hasOneUse() &&
7300 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7301 U->getOperand(0) == Vec &&
7302 U->getOperand(1).getOpcode() == ISD::Constant &&
7303 U->getConstantOperandVal(1) == 2) {
7304 SDValue OtherExtend = SDValue(*U->use_begin(), 0);
7305 if (OtherExtend.getOpcode() == N->getOpcode() &&
7306 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
7307 OtherExtend.getValueType() == MVT::f64) {
7308 SDValue VExtend, Chain;
7309 if (N->isStrictFPOpcode()) {
7310 Chain = MergeInputChains(N, OtherExtend.getNode());
7311 if (!Chain)
7312 continue;
7313 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
7314 {MVT::v2f64, MVT::Other}, {Chain, Vec});
7315 Chain = VExtend.getValue(1);
7316 } else
7317 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
7318 MVT::v2f64, Vec);
7319 DCI.AddToWorklist(VExtend.getNode());
7320 SDValue Extract1 =
7321 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
7322 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
7323 DCI.AddToWorklist(Extract1.getNode());
7324 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
7325 if (Chain)
7326 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
7327 SDValue Extract0 =
7328 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
7329 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7330 if (Chain)
7331 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7332 N->getVTList(), Extract0, Chain);
7333 return Extract0;
7334 }
7335 }
7336 }
7337 }
7338 return SDValue();
7339}
7340
7341SDValue SystemZTargetLowering::combineINT_TO_FP(
7342 SDNode *N, DAGCombinerInfo &DCI) const {
7343 if (DCI.Level != BeforeLegalizeTypes)
7344 return SDValue();
7345 SelectionDAG &DAG = DCI.DAG;
7346 LLVMContext &Ctx = *DAG.getContext();
7347 unsigned Opcode = N->getOpcode();
7348 EVT OutVT = N->getValueType(0);
7349 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
7350 SDValue Op = N->getOperand(0);
7351 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
7352 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
7353
7354 // Insert an extension before type-legalization to avoid scalarization, e.g.:
7355 // v2f64 = uint_to_fp v2i16
7356 // =>
7357 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
7358 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
7359 OutScalarBits <= 64) {
7360 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
7361 EVT ExtVT = EVT::getVectorVT(
7362 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
7363 unsigned ExtOpcode =
7365 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
7366 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
7367 }
7368 return SDValue();
7369}
7370
7371SDValue SystemZTargetLowering::combineBSWAP(
7372 SDNode *N, DAGCombinerInfo &DCI) const {
7373 SelectionDAG &DAG = DCI.DAG;
7374 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
7375 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7376 N->getOperand(0).hasOneUse() &&
7377 canLoadStoreByteSwapped(N->getValueType(0))) {
7378 SDValue Load = N->getOperand(0);
7379 LoadSDNode *LD = cast<LoadSDNode>(Load);
7380
7381 // Create the byte-swapping load.
7382 SDValue Ops[] = {
7383 LD->getChain(), // Chain
7384 LD->getBasePtr() // Ptr
7385 };
7386 EVT LoadVT = N->getValueType(0);
7387 if (LoadVT == MVT::i16)
7388 LoadVT = MVT::i32;
7389 SDValue BSLoad =
7391 DAG.getVTList(LoadVT, MVT::Other),
7392 Ops, LD->getMemoryVT(), LD->getMemOperand());
7393
7394 // If this is an i16 load, insert the truncate.
7395 SDValue ResVal = BSLoad;
7396 if (N->getValueType(0) == MVT::i16)
7397 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
7398
7399 // First, combine the bswap away. This makes the value produced by the
7400 // load dead.
7401 DCI.CombineTo(N, ResVal);
7402
7403 // Next, combine the load away, we give it a bogus result value but a real
7404 // chain result. The result value is dead because the bswap is dead.
7405 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
7406
7407 // Return N so it doesn't get rechecked!
7408 return SDValue(N, 0);
7409 }
7410
7411 // Look through bitcasts that retain the number of vector elements.
7412 SDValue Op = N->getOperand(0);
7413 if (Op.getOpcode() == ISD::BITCAST &&
7414 Op.getValueType().isVector() &&
7415 Op.getOperand(0).getValueType().isVector() &&
7416 Op.getValueType().getVectorNumElements() ==
7417 Op.getOperand(0).getValueType().getVectorNumElements())
7418 Op = Op.getOperand(0);
7419
7420 // Push BSWAP into a vector insertion if at least one side then simplifies.
7421 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
7422 SDValue Vec = Op.getOperand(0);
7423 SDValue Elt = Op.getOperand(1);
7424 SDValue Idx = Op.getOperand(2);
7425
7427 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
7429 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
7430 (canLoadStoreByteSwapped(N->getValueType(0)) &&
7431 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
7432 EVT VecVT = N->getValueType(0);
7433 EVT EltVT = N->getValueType(0).getVectorElementType();
7434 if (VecVT != Vec.getValueType()) {
7435 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
7436 DCI.AddToWorklist(Vec.getNode());
7437 }
7438 if (EltVT != Elt.getValueType()) {
7439 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
7440 DCI.AddToWorklist(Elt.getNode());
7441 }
7442 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
7443 DCI.AddToWorklist(Vec.getNode());
7444 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
7445 DCI.AddToWorklist(Elt.getNode());
7446 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
7447 Vec, Elt, Idx);
7448 }
7449 }
7450
7451 // Push BSWAP into a vector shuffle if at least one side then simplifies.
7452 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
7453 if (SV && Op.hasOneUse()) {
7454 SDValue Op0 = Op.getOperand(0);
7455 SDValue Op1 = Op.getOperand(1);
7456
7458 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
7460 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
7461 EVT VecVT = N->getValueType(0);
7462 if (VecVT != Op0.getValueType()) {
7463 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
7464 DCI.AddToWorklist(Op0.getNode());
7465 }
7466 if (VecVT != Op1.getValueType()) {
7467 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
7468 DCI.AddToWorklist(Op1.getNode());
7469 }
7470 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
7471 DCI.AddToWorklist(Op0.getNode());
7472 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
7473 DCI.AddToWorklist(Op1.getNode());
7474 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
7475 }
7476 }
7477
7478 return SDValue();
7479}
7480
7481static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
7482 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
7483 // set by the CCReg instruction using the CCValid / CCMask masks,
7484 // If the CCReg instruction is itself a ICMP testing the condition
7485 // code set by some other instruction, see whether we can directly
7486 // use that condition code.
7487
7488 // Verify that we have an ICMP against some constant.
7489 if (CCValid != SystemZ::CCMASK_ICMP)
7490 return false;
7491 auto *ICmp = CCReg.getNode();
7492 if (ICmp->getOpcode() != SystemZISD::ICMP)
7493 return false;
7494 auto *CompareLHS = ICmp->getOperand(0).getNode();
7495 auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
7496 if (!CompareRHS)
7497 return false;
7498
7499 // Optimize the case where CompareLHS is a SELECT_CCMASK.
7500 if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
7501 // Verify that we have an appropriate mask for a EQ or NE comparison.
7502 bool Invert = false;
7503 if (CCMask == SystemZ::CCMASK_CMP_NE)
7504 Invert = !Invert;
7505 else if (CCMask != SystemZ::CCMASK_CMP_EQ)
7506 return false;
7507
7508 // Verify that the ICMP compares against one of select values.
7509 auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
7510 if (!TrueVal)
7511 return false;
7512 auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7513 if (!FalseVal)
7514 return false;
7515 if (CompareRHS->getZExtValue() == FalseVal->getZExtValue())
7516 Invert = !Invert;
7517 else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue())
7518 return false;
7519
7520 // Compute the effective CC mask for the new branch or select.
7521 auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
7522 auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
7523 if (!NewCCValid || !NewCCMask)
7524 return false;
7525 CCValid = NewCCValid->getZExtValue();
7526 CCMask = NewCCMask->getZExtValue();
7527 if (Invert)
7528 CCMask ^= CCValid;
7529
7530 // Return the updated CCReg link.
7531 CCReg = CompareLHS->getOperand(4);
7532 return true;
7533 }
7534
7535 // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
7536 if (CompareLHS->getOpcode() == ISD::SRA) {
7537 auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7538 if (!SRACount || SRACount->getZExtValue() != 30)
7539 return false;
7540 auto *SHL = CompareLHS->getOperand(0).getNode();
7541 if (SHL->getOpcode() != ISD::SHL)
7542 return false;
7543 auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
7544 if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
7545 return false;
7546 auto *IPM = SHL->getOperand(0).getNode();
7547 if (IPM->getOpcode() != SystemZISD::IPM)
7548 return false;
7549
7550 // Avoid introducing CC spills (because SRA would clobber CC).
7551 if (!CompareLHS->hasOneUse())
7552 return false;
7553 // Verify that the ICMP compares against zero.
7554 if (CompareRHS->getZExtValue() != 0)
7555 return false;
7556
7557 // Compute the effective CC mask for the new branch or select.
7558 CCMask = SystemZ::reverseCCMask(CCMask);
7559
7560 // Return the updated CCReg link.
7561 CCReg = IPM->getOperand(0);
7562 return true;
7563 }
7564
7565 return false;
7566}
7567
7568SDValue SystemZTargetLowering::combineBR_CCMASK(
7569 SDNode *N, DAGCombinerInfo &DCI) const {
7570 SelectionDAG &DAG = DCI.DAG;
7571
7572 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
7573 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
7574 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
7575 if (!CCValid || !CCMask)
7576 return SDValue();
7577
7578 int CCValidVal = CCValid->getZExtValue();
7579 int CCMaskVal = CCMask->getZExtValue();
7580 SDValue Chain = N->getOperand(0);
7581 SDValue CCReg = N->getOperand(4);
7582
7583 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7584 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
7585 Chain,
7586 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7587 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7588 N->getOperand(3), CCReg);
7589 return SDValue();
7590}
7591
7592SDValue SystemZTargetLowering::combineSELECT_CCMASK(
7593 SDNode *N, DAGCombinerInfo &DCI) const {
7594 SelectionDAG &DAG = DCI.DAG;
7595
7596 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
7597 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
7598 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
7599 if (!CCValid || !CCMask)
7600 return SDValue();
7601
7602 int CCValidVal = CCValid->getZExtValue();
7603 int CCMaskVal = CCMask->getZExtValue();
7604 SDValue CCReg = N->getOperand(4);
7605
7606 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7607 return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
7608 N->getOperand(0), N->getOperand(1),
7609 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7610 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7611 CCReg);
7612 return SDValue();
7613}
7614
7615
7616SDValue SystemZTargetLowering::combineGET_CCMASK(
7617 SDNode *N, DAGCombinerInfo &DCI) const {
7618
7619 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
7620 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
7621 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
7622 if (!CCValid || !CCMask)
7623 return SDValue();
7624 int CCValidVal = CCValid->getZExtValue();
7625 int CCMaskVal = CCMask->getZExtValue();
7626
7627 SDValue Select = N->getOperand(0);
7628 if (Select->getOpcode() == ISD::TRUNCATE)
7629 Select = Select->getOperand(0);
7630 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
7631 return SDValue();
7632
7633 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
7634 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
7635 if (!SelectCCValid || !SelectCCMask)
7636 return SDValue();
7637 int SelectCCValidVal = SelectCCValid->getZExtValue();
7638 int SelectCCMaskVal = SelectCCMask->getZExtValue();
7639
7640 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
7641 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
7642 if (!TrueVal || !FalseVal)
7643 return SDValue();
7644 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
7645 ;
7646 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
7647 SelectCCMaskVal ^= SelectCCValidVal;
7648 else
7649 return SDValue();
7650
7651 if (SelectCCValidVal & ~CCValidVal)
7652 return SDValue();
7653 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
7654 return SDValue();
7655
7656 return Select->getOperand(4);
7657}
7658
7659SDValue SystemZTargetLowering::combineIntDIVREM(
7660 SDNode *N, DAGCombinerInfo &DCI) const {
7661 SelectionDAG &DAG = DCI.DAG;
7662 EVT VT = N->getValueType(0);
7663 // In the case where the divisor is a vector of constants a cheaper
7664 // sequence of instructions can replace the divide. BuildSDIV is called to
7665 // do this during DAG combining, but it only succeeds when it can build a
7666 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
7667 // since it is not Legal but Custom it can only happen before
7668 // legalization. Therefore we must scalarize this early before Combine
7669 // 1. For widened vectors, this is already the result of type legalization.
7670 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
7671 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
7672 return DAG.UnrollVectorOp(N);
7673 return SDValue();
7674}
7675
7676SDValue SystemZTargetLowering::combineINTRINSIC(
7677 SDNode *N, DAGCombinerInfo &DCI) const {
7678 SelectionDAG &DAG = DCI.DAG;
7679
7680 unsigned Id = N->getConstantOperandVal(1);
7681 switch (Id) {
7682 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
7683 // or larger is simply a vector load.
7684 case Intrinsic::s390_vll:
7685 case Intrinsic::s390_vlrl:
7686 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
7687 if (C->getZExtValue() >= 15)
7688 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
7689 N->getOperand(3), MachinePointerInfo());
7690 break;
7691 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
7692 case Intrinsic::s390_vstl:
7693 case Intrinsic::s390_vstrl:
7694 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
7695 if (C->getZExtValue() >= 15)
7696 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
7697 N->getOperand(4), MachinePointerInfo());
7698 break;
7699 }
7700
7701 return SDValue();
7702}
7703
7704SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
7705 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
7706 return N->getOperand(0);
7707 return N;
7708}
7709
7711 DAGCombinerInfo &DCI) const {
7712 switch(N->getOpcode()) {
7713 default: break;
7714 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
7715 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
7716 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
7718 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
7719 case ISD::LOAD: return combineLOAD(N, DCI);
7720 case ISD::STORE: return combineSTORE(N, DCI);
7721 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
7722 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
7723 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
7725 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
7727 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
7728 case ISD::SINT_TO_FP:
7729 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
7730 case ISD::BSWAP: return combineBSWAP(N, DCI);
7731 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
7732 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
7733 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
7734 case ISD::SDIV:
7735 case ISD::UDIV:
7736 case ISD::SREM:
7737 case ISD::UREM: return combineIntDIVREM(N, DCI);
7739 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
7740 }
7741
7742 return SDValue();
7743}
7744
7745// Return the demanded elements for the OpNo source operand of Op. DemandedElts
7746// are for Op.
7747static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
7748 unsigned OpNo) {
7749 EVT VT = Op.getValueType();
7750 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
7751 APInt SrcDemE;
7752 unsigned Opcode = Op.getOpcode();
7753 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7754 unsigned Id = Op.getConstantOperandVal(0);
7755 switch (Id) {
7756 case Intrinsic::s390_vpksh: // PACKS
7757 case Intrinsic::s390_vpksf:
7758 case Intrinsic::s390_vpksg:
7759 case Intrinsic::s390_vpkshs: // PACKS_CC
7760 case Intrinsic::s390_vpksfs:
7761 case Intrinsic::s390_vpksgs:
7762 case Intrinsic::s390_vpklsh: // PACKLS
7763 case Intrinsic::s390_vpklsf:
7764 case Intrinsic::s390_vpklsg:
7765 case Intrinsic::s390_vpklshs: // PACKLS_CC
7766 case Intrinsic::s390_vpklsfs:
7767 case Intrinsic::s390_vpklsgs:
7768 // VECTOR PACK truncates the elements of two source vectors into one.
7769 SrcDemE = DemandedElts;
7770 if (OpNo == 2)
7771 SrcDemE.lshrInPlace(NumElts / 2);
7772 SrcDemE = SrcDemE.trunc(NumElts / 2);
7773 break;
7774 // VECTOR UNPACK extends half the elements of the source vector.
7775 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7776 case Intrinsic::s390_vuphh:
7777 case Intrinsic::s390_vuphf:
7778 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
7779 case Intrinsic::s390_vuplhh:
7780 case Intrinsic::s390_vuplhf:
7781 SrcDemE = APInt(NumElts * 2, 0);
7782 SrcDemE.insertBits(DemandedElts, 0);
7783 break;
7784 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7785 case Intrinsic::s390_vuplhw:
7786 case Intrinsic::s390_vuplf:
7787 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
7788 case Intrinsic::s390_vupllh:
7789 case Intrinsic::s390_vupllf:
7790 SrcDemE = APInt(NumElts * 2, 0);
7791 SrcDemE.insertBits(DemandedElts, NumElts);
7792 break;
7793 case Intrinsic::s390_vpdi: {
7794 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
7795 SrcDemE = APInt(NumElts, 0);
7796 if (!DemandedElts[OpNo - 1])
7797 break;
7798 unsigned Mask = Op.getConstantOperandVal(3);
7799 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
7800 // Demand input element 0 or 1, given by the mask bit value.
7801 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
7802 break;
7803 }
7804 case Intrinsic::s390_vsldb: {
7805 // VECTOR SHIFT LEFT DOUBLE BY BYTE
7806 assert(VT == MVT::v16i8 && "Unexpected type.");
7807 unsigned FirstIdx = Op.getConstantOperandVal(3);
7808 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
7809 unsigned NumSrc0Els = 16 - FirstIdx;
7810 SrcDemE = APInt(NumElts, 0);
7811 if (OpNo == 1) {
7812 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
7813 SrcDemE.insertBits(DemEls, FirstIdx);
7814 } else {
7815 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
7816 SrcDemE.insertBits(DemEls, 0);
7817 }
7818 break;
7819 }
7820 case Intrinsic::s390_vperm:
7821 SrcDemE = APInt(NumElts, -1);
7822 break;
7823 default:
7824 llvm_unreachable("Unhandled intrinsic.");
7825 break;
7826 }
7827 } else {
7828 switch (Opcode) {
7830 // Scalar operand.
7831 SrcDemE = APInt(1, 1);
7832 break;
7834 SrcDemE = DemandedElts;
7835 break;
7836 default:
7837 llvm_unreachable("Unhandled opcode.");
7838 break;
7839 }
7840 }
7841 return SrcDemE;
7842}
7843
7844static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
7845 const APInt &DemandedElts,
7846 const SelectionDAG &DAG, unsigned Depth,
7847 unsigned OpNo) {
7848 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
7849 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
7850 KnownBits LHSKnown =
7851 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
7852 KnownBits RHSKnown =
7853 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
7854 Known = LHSKnown.intersectWith(RHSKnown);
7855}
7856
7857void
7859 KnownBits &Known,
7860 const APInt &DemandedElts,
7861 const SelectionDAG &DAG,
7862 unsigned Depth) const {
7863 Known.resetAll();
7864
7865 // Intrinsic CC result is returned in the two low bits.
7866 unsigned tmp0, tmp1; // not used
7867 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) {
7868 Known.Zero.setBitsFrom(2);
7869 return;
7870 }
7871 EVT VT = Op.getValueType();
7872 if (Op.getResNo() != 0 || VT == MVT::Untyped)
7873 return;
7874 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
7875 "KnownBits does not match VT in bitwidth");
7876 assert ((!VT.isVector() ||
7877 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
7878 "DemandedElts does not match VT number of elements");
7879 unsigned BitWidth = Known.getBitWidth();
7880 unsigned Opcode = Op.getOpcode();
7881 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7882 bool IsLogical = false;
7883 unsigned Id = Op.getConstantOperandVal(0);
7884 switch (Id) {
7885 case Intrinsic::s390_vpksh: // PACKS
7886 case Intrinsic::s390_vpksf:
7887 case Intrinsic::s390_vpksg:
7888 case Intrinsic::s390_vpkshs: // PACKS_CC
7889 case Intrinsic::s390_vpksfs:
7890 case Intrinsic::s390_vpksgs:
7891 case Intrinsic::s390_vpklsh: // PACKLS
7892 case Intrinsic::s390_vpklsf:
7893 case Intrinsic::s390_vpklsg:
7894 case Intrinsic::s390_vpklshs: // PACKLS_CC
7895 case Intrinsic::s390_vpklsfs:
7896 case Intrinsic::s390_vpklsgs:
7897 case Intrinsic::s390_vpdi:
7898 case Intrinsic::s390_vsldb:
7899 case Intrinsic::s390_vperm:
7900 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
7901 break;
7902 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
7903 case Intrinsic::s390_vuplhh:
7904 case Intrinsic::s390_vuplhf:
7905 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
7906 case Intrinsic::s390_vupllh:
7907 case Intrinsic::s390_vupllf:
7908 IsLogical = true;
7909 [[fallthrough]];
7910 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7911 case Intrinsic::s390_vuphh:
7912 case Intrinsic::s390_vuphf:
7913 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7914 case Intrinsic::s390_vuplhw:
7915 case Intrinsic::s390_vuplf: {
7916 SDValue SrcOp = Op.getOperand(1);
7917 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
7918 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
7919 if (IsLogical) {
7920 Known = Known.zext(BitWidth);
7921 } else
7922 Known = Known.sext(BitWidth);
7923 break;
7924 }
7925 default:
7926 break;
7927 }
7928 } else {
7929 switch (Opcode) {
7932 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
7933 break;
7934 case SystemZISD::REPLICATE: {
7935 SDValue SrcOp = Op.getOperand(0);
7936 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
7937 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
7938 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
7939 break;
7940 }
7941 default:
7942 break;
7943 }
7944 }
7945
7946 // Known has the width of the source operand(s). Adjust if needed to match
7947 // the passed bitwidth.
7948 if (Known.getBitWidth() != BitWidth)
7949 Known = Known.anyextOrTrunc(BitWidth);
7950}
7951
7952static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
7953 const SelectionDAG &DAG, unsigned Depth,
7954 unsigned OpNo) {
7955 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
7956 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
7957 if (LHS == 1) return 1; // Early out.
7958 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
7959 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
7960 if (RHS == 1) return 1; // Early out.
7961 unsigned Common = std::min(LHS, RHS);
7962 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
7963 EVT VT = Op.getValueType();
7964 unsigned VTBits = VT.getScalarSizeInBits();
7965 if (SrcBitWidth > VTBits) { // PACK
7966 unsigned SrcExtraBits = SrcBitWidth - VTBits;
7967 if (Common > SrcExtraBits)
7968 return (Common - SrcExtraBits);
7969 return 1;
7970 }
7971 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
7972 return Common;
7973}
7974
7975unsigned
7977 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
7978 unsigned Depth) const {
7979 if (Op.getResNo() != 0)
7980 return 1;
7981 unsigned Opcode = Op.getOpcode();
7982 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7983 unsigned Id = Op.getConstantOperandVal(0);
7984 switch (Id) {
7985 case Intrinsic::s390_vpksh: // PACKS
7986 case Intrinsic::s390_vpksf:
7987 case Intrinsic::s390_vpksg:
7988 case Intrinsic::s390_vpkshs: // PACKS_CC
7989 case Intrinsic::s390_vpksfs:
7990 case Intrinsic::s390_vpksgs:
7991 case Intrinsic::s390_vpklsh: // PACKLS
7992 case Intrinsic::s390_vpklsf:
7993 case Intrinsic::s390_vpklsg:
7994 case Intrinsic::s390_vpklshs: // PACKLS_CC
7995 case Intrinsic::s390_vpklsfs:
7996 case Intrinsic::s390_vpklsgs:
7997 case Intrinsic::s390_vpdi:
7998 case Intrinsic::s390_vsldb:
7999 case Intrinsic::s390_vperm:
8000 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
8001 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
8002 case Intrinsic::s390_vuphh:
8003 case Intrinsic::s390_vuphf:
8004 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
8005 case Intrinsic::s390_vuplhw:
8006 case Intrinsic::s390_vuplf: {
8007 SDValue PackedOp = Op.getOperand(1);
8008 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
8009 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
8010 EVT VT = Op.getValueType();
8011 unsigned VTBits = VT.getScalarSizeInBits();
8012 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
8013 return Tmp;
8014 }
8015 default:
8016 break;
8017 }
8018 } else {
8019 switch (Opcode) {
8021 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
8022 default:
8023 break;
8024 }
8025 }
8026
8027 return 1;
8028}
8029
8032 const APInt &DemandedElts, const SelectionDAG &DAG,
8033 bool PoisonOnly, unsigned Depth) const {
8034 switch (Op->getOpcode()) {
8037 return true;
8038 }
8039 return false;
8040}
8041
8042unsigned
8044 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
8045 unsigned StackAlign = TFI->getStackAlignment();
8046 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
8047 "Unexpected stack alignment");
8048 // The default stack probe size is 4096 if the function has no
8049 // stack-probe-size attribute.
8050 unsigned StackProbeSize =
8051 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
8052 // Round down to the stack alignment.
8053 StackProbeSize &= ~(StackAlign - 1);
8054 return StackProbeSize ? StackProbeSize : StackAlign;
8055}
8056
8057//===----------------------------------------------------------------------===//
8058// Custom insertion
8059//===----------------------------------------------------------------------===//
8060
8061// Force base value Base into a register before MI. Return the register.
8063 const SystemZInstrInfo *TII) {
8064 MachineBasicBlock *MBB = MI.getParent();
8065 MachineFunction &MF = *MBB->getParent();
8067
8068 if (Base.isReg()) {
8069 // Copy Base into a new virtual register to help register coalescing in
8070 // cases with multiple uses.
8071 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8072 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
8073 .add(Base);
8074 return Reg;
8075 }
8076
8077 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8078 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
8079 .add(Base)
8080 .addImm(0)
8081 .addReg(0);
8082 return Reg;
8083}
8084
8085// The CC operand of MI might be missing a kill marker because there
8086// were multiple uses of CC, and ISel didn't know which to mark.
8087// Figure out whether MI should have had a kill marker.
8089 // Scan forward through BB for a use/def of CC.
8091 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
8092 const MachineInstr& mi = *miI;
8093 if (mi.readsRegister(SystemZ::CC))
8094 return false;
8095 if (mi.definesRegister(SystemZ::CC))
8096 break; // Should have kill-flag - update below.
8097 }
8098
8099 // If we hit the end of the block, check whether CC is live into a
8100 // successor.
8101 if (miI == MBB->end()) {
8102 for (const MachineBasicBlock *Succ : MBB->successors())
8103 if (Succ->isLiveIn(SystemZ::CC))
8104 return false;
8105 }
8106
8107 return true;
8108}
8109
8110// Return true if it is OK for this Select pseudo-opcode to be cascaded
8111// together with other Select pseudo-opcodes into a single basic-block with
8112// a conditional jump around it.
8114 switch (MI.getOpcode()) {
8115 case SystemZ::Select32:
8116 case SystemZ::Select64:
8117 case SystemZ::Select128:
8118 case SystemZ::SelectF32:
8119 case SystemZ::SelectF64:
8120 case SystemZ::SelectF128:
8121 case SystemZ::SelectVR32:
8122 case SystemZ::SelectVR64:
8123 case SystemZ::SelectVR128:
8124 return true;
8125
8126 default:
8127 return false;
8128 }
8129}
8130
8131// Helper function, which inserts PHI functions into SinkMBB:
8132// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
8133// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
8135 MachineBasicBlock *TrueMBB,
8136 MachineBasicBlock *FalseMBB,
8137 MachineBasicBlock *SinkMBB) {
8138 MachineFunction *MF = TrueMBB->getParent();
8140
8141 MachineInstr *FirstMI = Selects.front();
8142 unsigned CCValid = FirstMI->getOperand(3).getImm();
8143 unsigned CCMask = FirstMI->getOperand(4).getImm();
8144
8145 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
8146
8147 // As we are creating the PHIs, we have to be careful if there is more than
8148 // one. Later Selects may reference the results of earlier Selects, but later
8149 // PHIs have to reference the individual true/false inputs from earlier PHIs.
8150 // That also means that PHI construction must work forward from earlier to
8151 // later, and that the code must maintain a mapping from earlier PHI's
8152 // destination registers, and the registers that went into the PHI.
8154
8155 for (auto *MI : Selects) {
8156 Register DestReg = MI->getOperand(0).getReg();
8157 Register TrueReg = MI->getOperand(1).getReg();
8158 Register FalseReg = MI->getOperand(2).getReg();
8159
8160 // If this Select we are generating is the opposite condition from
8161 // the jump we generated, then we have to swap the operands for the
8162 // PHI that is going to be generated.
8163 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
8164 std::swap(TrueReg, FalseReg);
8165
8166 if (RegRewriteTable.contains(TrueReg))
8167 TrueReg = RegRewriteTable[TrueReg].first;
8168
8169 if (RegRewriteTable.contains(FalseReg))
8170 FalseReg = RegRewriteTable[FalseReg].second;
8171
8172 DebugLoc DL = MI->getDebugLoc();
8173 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
8174 .addReg(TrueReg).addMBB(TrueMBB)
8175 .addReg(FalseReg).addMBB(FalseMBB);
8176
8177 // Add this PHI to the rewrite table.
8178 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
8179 }
8180
8182}
8183
8185SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
8186 MachineBasicBlock *BB) const {
8187 MachineFunction &MF = *BB->getParent();
8188 MachineFrameInfo &MFI = MF.getFrameInfo();
8189 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
8190 assert(TFL->hasReservedCallFrame(MF) &&
8191 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
8192 (void)TFL;
8193 // Get the MaxCallFrameSize value and erase MI since it serves no further
8194 // purpose as the call frame is statically reserved in the prolog. Set
8195 // AdjustsStack as MI is *not* mapped as a frame instruction.
8196 uint32_t NumBytes = MI.getOperand(0).getImm();
8197 if (NumBytes > MFI.getMaxCallFrameSize())
8198 MFI.setMaxCallFrameSize(NumBytes);
8199 MFI.setAdjustsStack(true);
8200
8201 MI.eraseFromParent();
8202 return BB;
8203}
8204
8205// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
8207SystemZTargetLowering::emitSelect(MachineInstr &MI,
8208 MachineBasicBlock *MBB) const {
8209 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
8210 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8211
8212 unsigned CCValid = MI.getOperand(3).getImm();
8213 unsigned CCMask = MI.getOperand(4).getImm();
8214
8215 // If we have a sequence of Select* pseudo instructions using the
8216 // same condition code value, we want to expand all of them into
8217 // a single pair of basic blocks using the same condition.
8220 Selects.push_back(&MI);
8221 unsigned Count = 0;
8222 for (MachineInstr &NextMI : llvm::make_range(
8223 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
8224 if (isSelectPseudo(NextMI)) {
8225 assert(NextMI.getOperand(3).getImm() == CCValid &&
8226 "Bad CCValid operands since CC was not redefined.");
8227 if (NextMI.getOperand(4).getImm() == CCMask ||
8228 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
8229 Selects.push_back(&NextMI);
8230 continue;
8231 }
8232 break;
8233 }
8234 if (NextMI.definesRegister(SystemZ::CC) || NextMI.usesCustomInsertionHook())
8235 break;
8236 bool User = false;
8237 for (auto *SelMI : Selects)
8238 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
8239 User = true;
8240 break;
8241 }
8242 if (NextMI.isDebugInstr()) {
8243 if (User) {
8244 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
8245 DbgValues.push_back(&NextMI);
8246 }
8247 } else if (User || ++Count > 20)
8248 break;
8249 }
8250
8251 MachineInstr *LastMI = Selects.back();
8252 bool CCKilled =
8253 (LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB));
8254 MachineBasicBlock *StartMBB = MBB;
8256 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8257
8258 // Unless CC was killed in the last Select instruction, mark it as
8259 // live-in to both FalseMBB and JoinMBB.
8260 if (!CCKilled) {
8261 FalseMBB->addLiveIn(SystemZ::CC);
8262 JoinMBB->addLiveIn(SystemZ::CC);
8263 }
8264
8265 // StartMBB:
8266 // BRC CCMask, JoinMBB
8267 // # fallthrough to FalseMBB
8268 MBB = StartMBB;
8269 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8270 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8271 MBB->addSuccessor(JoinMBB);
8272 MBB->addSuccessor(FalseMBB);
8273
8274 // FalseMBB:
8275 // # fallthrough to JoinMBB
8276 MBB = FalseMBB;
8277 MBB->addSuccessor(JoinMBB);
8278
8279 // JoinMBB:
8280 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
8281 // ...
8282 MBB = JoinMBB;
8283 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
8284 for (auto *SelMI : Selects)
8285 SelMI->eraseFromParent();
8286
8288 for (auto *DbgMI : DbgValues)
8289 MBB->splice(InsertPos, StartMBB, DbgMI);
8290
8291 return JoinMBB;
8292}
8293
8294// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
8295// StoreOpcode is the store to use and Invert says whether the store should
8296// happen when the condition is false rather than true. If a STORE ON
8297// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
8298MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
8300 unsigned StoreOpcode,
8301 unsigned STOCOpcode,
8302 bool Invert) const {
8303 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8304
8305 Register SrcReg = MI.getOperand(0).getReg();
8306 MachineOperand Base = MI.getOperand(1);
8307 int64_t Disp = MI.getOperand(2).getImm();
8308 Register IndexReg = MI.getOperand(3).getReg();
8309 unsigned CCValid = MI.getOperand(4).getImm();
8310 unsigned CCMask = MI.getOperand(5).getImm();
8311 DebugLoc DL = MI.getDebugLoc();
8312
8313 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
8314
8315 // ISel pattern matching also adds a load memory operand of the same
8316 // address, so take special care to find the storing memory operand.
8317 MachineMemOperand *MMO = nullptr;
8318 for (auto *I : MI.memoperands())
8319 if (I->isStore()) {
8320 MMO = I;
8321 break;
8322 }
8323
8324 // Use STOCOpcode if possible. We could use different store patterns in
8325 // order to avoid matching the index register, but the performance trade-offs
8326 // might be more complicated in that case.
8327 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
8328 if (Invert)
8329 CCMask ^= CCValid;
8330
8331 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
8332 .addReg(SrcReg)
8333 .add(Base)
8334 .addImm(Disp)
8335 .addImm(CCValid)
8336 .addImm(CCMask)
8337 .addMemOperand(MMO);
8338
8339 MI.eraseFromParent();
8340 return MBB;
8341 }
8342
8343 // Get the condition needed to branch around the store.
8344 if (!Invert)
8345 CCMask ^= CCValid;
8346
8347 MachineBasicBlock *StartMBB = MBB;
8349 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8350
8351 // Unless CC was killed in the CondStore instruction, mark it as
8352 // live-in to both FalseMBB and JoinMBB.
8353 if (!MI.killsRegister(SystemZ::CC) && !checkCCKill(MI, JoinMBB)) {
8354 FalseMBB->addLiveIn(SystemZ::CC);
8355 JoinMBB->addLiveIn(SystemZ::CC);
8356 }
8357
8358 // StartMBB:
8359 // BRC CCMask, JoinMBB
8360 // # fallthrough to FalseMBB
8361 MBB = StartMBB;
8362 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8363 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8364 MBB->addSuccessor(JoinMBB);
8365 MBB->addSuccessor(FalseMBB);
8366
8367 // FalseMBB:
8368 // store %SrcReg, %Disp(%Index,%Base)
8369 // # fallthrough to JoinMBB
8370 MBB = FalseMBB;
8371 BuildMI(MBB, DL, TII->get(StoreOpcode))
8372 .addReg(SrcReg)
8373 .add(Base)
8374 .addImm(Disp)
8375 .addReg(IndexReg)
8376 .addMemOperand(MMO);
8377 MBB->addSuccessor(JoinMBB);
8378
8379 MI.eraseFromParent();
8380 return JoinMBB;
8381}
8382
8383// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
8385SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
8387 bool Unsigned) const {
8388 MachineFunction &MF = *MBB->getParent();
8389 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8391
8392 // Synthetic instruction to compare 128-bit values.
8393 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
8394 Register Op0 = MI.getOperand(0).getReg();
8395 Register Op1 = MI.getOperand(1).getReg();
8396
8397 MachineBasicBlock *StartMBB = MBB;
8399 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
8400
8401 // StartMBB:
8402 //
8403 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
8404 // Swap the inputs to get:
8405 // CC 1 if high(Op0) > high(Op1)
8406 // CC 2 if high(Op0) < high(Op1)
8407 // CC 0 if high(Op0) == high(Op1)
8408 //
8409 // If CC != 0, we'd done, so jump over the next instruction.
8410 //
8411 // VEC[L]G Op1, Op0
8412 // JNE JoinMBB
8413 // # fallthrough to HiEqMBB
8414 MBB = StartMBB;
8415 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
8416 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
8417 .addReg(Op1).addReg(Op0);
8418 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8420 MBB->addSuccessor(JoinMBB);
8421 MBB->addSuccessor(HiEqMBB);
8422
8423 // HiEqMBB:
8424 //
8425 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
8426 // Since we already know the high parts are equal, the CC
8427 // result will only depend on the low parts:
8428 // CC 1 if low(Op0) > low(Op1)
8429 // CC 3 if low(Op0) <= low(Op1)
8430 //
8431 // VCHLGS Tmp, Op0, Op1
8432 // # fallthrough to JoinMBB
8433 MBB = HiEqMBB;
8434 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
8435 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
8436 .addReg(Op0).addReg(Op1);
8437 MBB->addSuccessor(JoinMBB);
8438
8439 // Mark CC as live-in to JoinMBB.
8440 JoinMBB->addLiveIn(SystemZ::CC);
8441
8442 MI.eraseFromParent();
8443 return JoinMBB;
8444}
8445
8446// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
8447// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
8448// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
8449// whether the field should be inverted after performing BinOpcode (e.g. for
8450// NAND).
8451MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
8452 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
8453 bool Invert) const {
8454 MachineFunction &MF = *MBB->getParent();
8455 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8457
8458 // Extract the operands. Base can be a register or a frame index.
8459 // Src2 can be a register or immediate.
8460 Register Dest = MI.getOperand(0).getReg();
8461 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8462 int64_t Disp = MI.getOperand(2).getImm();
8463 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
8464 Register BitShift = MI.getOperand(4).getReg();
8465 Register NegBitShift = MI.getOperand(5).getReg();
8466 unsigned BitSize = MI.getOperand(6).getImm();
8467 DebugLoc DL = MI.getDebugLoc();
8468
8469 // Get the right opcodes for the displacement.
8470 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8471 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8472 assert(LOpcode && CSOpcode && "Displacement out of range");
8473
8474 // Create virtual registers for temporary results.
8475 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8476 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8477 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8478 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8479 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8480
8481 // Insert a basic block for the main loop.
8482 MachineBasicBlock *StartMBB = MBB;
8484 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8485
8486 // StartMBB:
8487 // ...
8488 // %OrigVal = L Disp(%Base)
8489 // # fall through to LoopMBB
8490 MBB = StartMBB;
8491 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
8492 MBB->addSuccessor(LoopMBB);
8493
8494 // LoopMBB:
8495 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
8496 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
8497 // %RotatedNewVal = OP %RotatedOldVal, %Src2
8498 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
8499 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
8500 // JNE LoopMBB
8501 // # fall through to DoneMBB
8502 MBB = LoopMBB;
8503 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8504 .addReg(OrigVal).addMBB(StartMBB)
8505 .addReg(Dest).addMBB(LoopMBB);
8506 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
8507 .addReg(OldVal).addReg(BitShift).addImm(0);
8508 if (Invert) {
8509 // Perform the operation normally and then invert every bit of the field.
8510 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8511 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
8512 // XILF with the upper BitSize bits set.
8513 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
8514 .addReg(Tmp).addImm(-1U << (32 - BitSize));
8515 } else if (BinOpcode)
8516 // A simply binary operation.
8517 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
8518 .addReg(RotatedOldVal)
8519 .add(Src2);
8520 else
8521 // Use RISBG to rotate Src2 into position and use it to replace the
8522 // field in RotatedOldVal.
8523 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
8524 .addReg(RotatedOldVal).addReg(Src2.getReg())
8525 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
8526 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
8527 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
8528 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
8529 .addReg(OldVal)
8530 .addReg(NewVal)
8531 .add(Base)
8532 .addImm(Disp);
8533 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8535 MBB->addSuccessor(LoopMBB);
8536 MBB->addSuccessor(DoneMBB);
8537
8538 MI.eraseFromParent();
8539 return DoneMBB;
8540}
8541
8542// Implement EmitInstrWithCustomInserter for subword pseudo
8543// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
8544// instruction that should be used to compare the current field with the
8545// minimum or maximum value. KeepOldMask is the BRC condition-code mask
8546// for when the current field should be kept.
8547MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
8548 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
8549 unsigned KeepOldMask) const {
8550 MachineFunction &MF = *MBB->getParent();
8551 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8553
8554 // Extract the operands. Base can be a register or a frame index.
8555 Register Dest = MI.getOperand(0).getReg();
8556 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8557 int64_t Disp = MI.getOperand(2).getImm();
8558 Register Src2 = MI.getOperand(3).getReg();
8559 Register BitShift = MI.getOperand(4).getReg();
8560 Register NegBitShift = MI.getOperand(5).getReg();
8561 unsigned BitSize = MI.getOperand(6).getImm();
8562 DebugLoc DL = MI.getDebugLoc();
8563
8564 // Get the right opcodes for the displacement.
8565 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8566 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8567 assert(LOpcode && CSOpcode && "Displacement out of range");
8568
8569 // Create virtual registers for temporary results.
8570 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8571 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8572 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8573 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8574 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8575 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8576
8577 // Insert 3 basic blocks for the loop.
8578 MachineBasicBlock *StartMBB = MBB;
8580 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8581 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
8582 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
8583
8584 // StartMBB:
8585 // ...
8586 // %OrigVal = L Disp(%Base)
8587 // # fall through to LoopMBB
8588 MBB = StartMBB;
8589 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
8590 MBB->addSuccessor(LoopMBB);
8591
8592 // LoopMBB:
8593 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
8594 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
8595 // CompareOpcode %RotatedOldVal, %Src2
8596 // BRC KeepOldMask, UpdateMBB
8597 MBB = LoopMBB;
8598 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8599 .addReg(OrigVal).addMBB(StartMBB)
8600 .addReg(Dest).addMBB(UpdateMBB);
8601 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
8602 .addReg(OldVal).addReg(BitShift).addImm(0);
8603 BuildMI(MBB, DL, TII->get(CompareOpcode))
8604 .addReg(RotatedOldVal).addReg(Src2);
8605 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8606 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
8607 MBB->addSuccessor(UpdateMBB);
8608 MBB->addSuccessor(UseAltMBB);
8609
8610 // UseAltMBB:
8611 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
8612 // # fall through to UpdateMBB
8613 MBB = UseAltMBB;
8614 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
8615 .addReg(RotatedOldVal).addReg(Src2)
8616 .addImm(32).addImm(31 + BitSize).addImm(0);
8617 MBB->addSuccessor(UpdateMBB);
8618
8619 // UpdateMBB:
8620 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
8621 // [ %RotatedAltVal, UseAltMBB ]
8622 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
8623 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
8624 // JNE LoopMBB
8625 // # fall through to DoneMBB
8626 MBB = UpdateMBB;
8627 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
8628 .addReg(RotatedOldVal).addMBB(LoopMBB)
8629 .addReg(RotatedAltVal).addMBB(UseAltMBB);
8630 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
8631 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
8632 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
8633 .addReg(OldVal)
8634 .addReg(NewVal)
8635 .add(Base)
8636 .addImm(Disp);
8637 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8639 MBB->addSuccessor(LoopMBB);
8640 MBB->addSuccessor(DoneMBB);
8641
8642 MI.eraseFromParent();
8643 return DoneMBB;
8644}
8645
8646// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
8647// instruction MI.
8649SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
8650 MachineBasicBlock *MBB) const {
8651 MachineFunction &MF = *MBB->getParent();
8652 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8654
8655 // Extract the operands. Base can be a register or a frame index.
8656 Register Dest = MI.getOperand(0).getReg();
8657 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8658 int64_t Disp = MI.getOperand(2).getImm();
8659 Register CmpVal = MI.getOperand(3).getReg();
8660 Register OrigSwapVal = MI.getOperand(4).getReg();
8661 Register BitShift = MI.getOperand(5).getReg();
8662 Register NegBitShift = MI.getOperand(6).getReg();
8663 int64_t BitSize = MI.getOperand(7).getImm();
8664 DebugLoc DL = MI.getDebugLoc();
8665
8666 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
8667
8668 // Get the right opcodes for the displacement and zero-extension.
8669 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8670 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8671 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
8672 assert(LOpcode && CSOpcode && "Displacement out of range");
8673
8674 // Create virtual registers for temporary results.
8675 Register OrigOldVal = MRI.createVirtualRegister(RC);
8676 Register OldVal = MRI.createVirtualRegister(RC);
8677 Register SwapVal = MRI.createVirtualRegister(RC);
8678 Register StoreVal = MRI.createVirtualRegister(RC);
8679 Register OldValRot = MRI.createVirtualRegister(RC);
8680 Register RetryOldVal = MRI.createVirtualRegister(RC);
8681 Register RetrySwapVal = MRI.createVirtualRegister(RC);
8682
8683 // Insert 2 basic blocks for the loop.
8684 MachineBasicBlock *StartMBB = MBB;
8686 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8687 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
8688
8689 // StartMBB:
8690 // ...
8691 // %OrigOldVal = L Disp(%Base)
8692 // # fall through to LoopMBB
8693 MBB = StartMBB;
8694 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
8695 .add(Base)
8696 .addImm(Disp)
8697 .addReg(0);
8698 MBB->addSuccessor(LoopMBB);
8699
8700 // LoopMBB:
8701 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
8702 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
8703 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
8704 // ^^ The low BitSize bits contain the field
8705 // of interest.
8706 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
8707 // ^^ Replace the upper 32-BitSize bits of the
8708 // swap value with those that we loaded and rotated.
8709 // %Dest = LL[CH] %OldValRot
8710 // CR %Dest, %CmpVal
8711 // JNE DoneMBB
8712 // # Fall through to SetMBB
8713 MBB = LoopMBB;
8714 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8715 .addReg(OrigOldVal).addMBB(StartMBB)
8716 .addReg(RetryOldVal).addMBB(SetMBB);
8717 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
8718 .addReg(OrigSwapVal).addMBB(StartMBB)
8719 .addReg(RetrySwapVal).addMBB(SetMBB);
8720 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
8721 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
8722 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
8723 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
8724 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
8725 .addReg(OldValRot);
8726 BuildMI(MBB, DL, TII->get(SystemZ::CR))
8727 .addReg(Dest).addReg(CmpVal);
8728 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8731 MBB->addSuccessor(DoneMBB);
8732 MBB->addSuccessor(SetMBB);
8733
8734 // SetMBB:
8735 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
8736 // ^^ Rotate the new field to its proper position.
8737 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
8738 // JNE LoopMBB
8739 // # fall through to ExitMBB
8740 MBB = SetMBB;
8741 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
8742 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
8743 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
8744 .addReg(OldVal)
8745 .addReg(StoreVal)
8746 .add(Base)
8747 .addImm(Disp);
8748 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8750 MBB->addSuccessor(LoopMBB);
8751 MBB->addSuccessor(DoneMBB);
8752
8753 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
8754 // to the block after the loop. At this point, CC may have been defined
8755 // either by the CR in LoopMBB or by the CS in SetMBB.
8756 if (!MI.registerDefIsDead(SystemZ::CC))
8757 DoneMBB->addLiveIn(SystemZ::CC);
8758
8759 MI.eraseFromParent();
8760 return DoneMBB;
8761}
8762
8763// Emit a move from two GR64s to a GR128.
8765SystemZTargetLowering::emitPair128(MachineInstr &MI,
8766 MachineBasicBlock *MBB) const {
8767 MachineFunction &MF = *MBB->getParent();
8768 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8770 DebugLoc DL = MI.getDebugLoc();
8771
8772 Register Dest = MI.getOperand(0).getReg();
8773 Register Hi = MI.getOperand(1).getReg();
8774 Register Lo = MI.getOperand(2).getReg();
8775 Register Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8776 Register Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8777
8778 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Tmp1);
8779 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Tmp2)
8780 .addReg(Tmp1).addReg(Hi).addImm(SystemZ::subreg_h64);
8781 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
8782 .addReg(Tmp2).addReg(Lo).addImm(SystemZ::subreg_l64);
8783
8784 MI.eraseFromParent();
8785 return MBB;
8786}
8787
8788// Emit an extension from a GR64 to a GR128. ClearEven is true
8789// if the high register of the GR128 value must be cleared or false if
8790// it's "don't care".
8791MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
8793 bool ClearEven) const {
8794 MachineFunction &MF = *MBB->getParent();
8795 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8797 DebugLoc DL = MI.getDebugLoc();
8798
8799 Register Dest = MI.getOperand(0).getReg();
8800 Register Src = MI.getOperand(1).getReg();
8801 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8802
8803 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
8804 if (ClearEven) {
8805 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8806 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
8807
8808 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
8809 .addImm(0);
8810 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
8811 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
8812 In128 = NewIn128;
8813 }
8814 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
8815 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
8816
8817 MI.eraseFromParent();
8818 return MBB;
8819}
8820
8822SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
8824 unsigned Opcode, bool IsMemset) const {
8825 MachineFunction &MF = *MBB->getParent();
8826 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8828 DebugLoc DL = MI.getDebugLoc();
8829
8830 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
8831 uint64_t DestDisp = MI.getOperand(1).getImm();
8832 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
8833 uint64_t SrcDisp;
8834
8835 // Fold the displacement Disp if it is out of range.
8836 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
8837 if (!isUInt<12>(Disp)) {
8838 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8839 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
8840 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
8841 .add(Base).addImm(Disp).addReg(0);
8842 Base = MachineOperand::CreateReg(Reg, false);
8843 Disp = 0;
8844 }
8845 };
8846
8847 if (!IsMemset) {
8848 SrcBase = earlyUseOperand(MI.getOperand(2));
8849 SrcDisp = MI.getOperand(3).getImm();
8850 } else {
8851 SrcBase = DestBase;
8852 SrcDisp = DestDisp++;
8853 foldDisplIfNeeded(DestBase, DestDisp);
8854 }
8855
8856 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
8857 bool IsImmForm = LengthMO.isImm();
8858 bool IsRegForm = !IsImmForm;
8859
8860 // Build and insert one Opcode of Length, with special treatment for memset.
8861 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
8863 MachineOperand DBase, uint64_t DDisp,
8865 unsigned Length) -> void {
8866 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
8867 if (IsMemset) {
8868 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
8869 if (ByteMO.isImm())
8870 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
8871 .add(SBase).addImm(SDisp).add(ByteMO);
8872 else
8873 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
8874 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
8875 if (--Length == 0)
8876 return;
8877 }
8878 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
8879 .add(DBase).addImm(DDisp).addImm(Length)
8880 .add(SBase).addImm(SDisp)
8881 .setMemRefs(MI.memoperands());
8882 };
8883
8884 bool NeedsLoop = false;
8885 uint64_t ImmLength = 0;
8886 Register LenAdjReg = SystemZ::NoRegister;
8887 if (IsImmForm) {
8888 ImmLength = LengthMO.getImm();
8889 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
8890 if (ImmLength == 0) {
8891 MI.eraseFromParent();
8892 return MBB;
8893 }
8894 if (Opcode == SystemZ::CLC) {
8895 if (ImmLength > 3 * 256)
8896 // A two-CLC sequence is a clear win over a loop, not least because
8897 // it needs only one branch. A three-CLC sequence needs the same
8898 // number of branches as a loop (i.e. 2), but is shorter. That
8899 // brings us to lengths greater than 768 bytes. It seems relatively
8900 // likely that a difference will be found within the first 768 bytes,
8901 // so we just optimize for the smallest number of branch
8902 // instructions, in order to avoid polluting the prediction buffer
8903 // too much.
8904 NeedsLoop = true;
8905 } else if (ImmLength > 6 * 256)
8906 // The heuristic we use is to prefer loops for anything that would
8907 // require 7 or more MVCs. With these kinds of sizes there isn't much
8908 // to choose between straight-line code and looping code, since the
8909 // time will be dominated by the MVCs themselves.
8910 NeedsLoop = true;
8911 } else {
8912 NeedsLoop = true;
8913 LenAdjReg = LengthMO.getReg();
8914 }
8915
8916 // When generating more than one CLC, all but the last will need to
8917 // branch to the end when a difference is found.
8918 MachineBasicBlock *EndMBB =
8919 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
8921 : nullptr);
8922
8923 if (NeedsLoop) {
8924 Register StartCountReg =
8925 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
8926 if (IsImmForm) {
8927 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
8928 ImmLength &= 255;
8929 } else {
8930 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
8931 .addReg(LenAdjReg)
8932 .addReg(0)
8933 .addImm(8);
8934 }
8935
8936 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
8937 auto loadZeroAddress = [&]() -> MachineOperand {
8938 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8939 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
8940 return MachineOperand::CreateReg(Reg, false);
8941 };
8942 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
8943 DestBase = loadZeroAddress();
8944 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
8945 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
8946
8947 MachineBasicBlock *StartMBB = nullptr;
8948 MachineBasicBlock *LoopMBB = nullptr;
8949 MachineBasicBlock *NextMBB = nullptr;
8950 MachineBasicBlock *DoneMBB = nullptr;
8951 MachineBasicBlock *AllDoneMBB = nullptr;
8952
8953 Register StartSrcReg = forceReg(MI, SrcBase, TII);
8954 Register StartDestReg =
8955 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
8956
8957 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
8958 Register ThisSrcReg = MRI.createVirtualRegister(RC);
8959 Register ThisDestReg =
8960 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
8961 Register NextSrcReg = MRI.createVirtualRegister(RC);
8962 Register NextDestReg =
8963 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
8964 RC = &SystemZ::GR64BitRegClass;
8965 Register ThisCountReg = MRI.createVirtualRegister(RC);
8966 Register NextCountReg = MRI.createVirtualRegister(RC);
8967
8968 if (IsRegForm) {
8969 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
8970 StartMBB = SystemZ::emitBlockAfter(MBB);
8971 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8972 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
8973 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
8974
8975 // MBB:
8976 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
8977 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8978 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
8979 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8981 .addMBB(AllDoneMBB);
8982 MBB->addSuccessor(AllDoneMBB);
8983 if (!IsMemset)
8984 MBB->addSuccessor(StartMBB);
8985 else {
8986 // MemsetOneCheckMBB:
8987 // # Jump to MemsetOneMBB for a memset of length 1, or
8988 // # fall thru to StartMBB.
8989 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
8990 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
8991 MBB->addSuccessor(MemsetOneCheckMBB);
8992 MBB = MemsetOneCheckMBB;
8993 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8994 .addReg(LenAdjReg).addImm(-1);
8995 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8997 .addMBB(MemsetOneMBB);
8998 MBB->addSuccessor(MemsetOneMBB, {10, 100});
8999 MBB->addSuccessor(StartMBB, {90, 100});
9000
9001 // MemsetOneMBB:
9002 // # Jump back to AllDoneMBB after a single MVI or STC.
9003 MBB = MemsetOneMBB;
9004 insertMemMemOp(MBB, MBB->end(),
9005 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
9006 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
9007 1);
9008 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
9009 MBB->addSuccessor(AllDoneMBB);
9010 }
9011
9012 // StartMBB:
9013 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
9014 MBB = StartMBB;
9015 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9016 .addReg(StartCountReg).addImm(0);
9017 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9019 .addMBB(DoneMBB);
9020 MBB->addSuccessor(DoneMBB);
9021 MBB->addSuccessor(LoopMBB);
9022 }
9023 else {
9024 StartMBB = MBB;
9025 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
9026 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9027 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
9028
9029 // StartMBB:
9030 // # fall through to LoopMBB
9031 MBB->addSuccessor(LoopMBB);
9032
9033 DestBase = MachineOperand::CreateReg(NextDestReg, false);
9034 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
9035 if (EndMBB && !ImmLength)
9036 // If the loop handled the whole CLC range, DoneMBB will be empty with
9037 // CC live-through into EndMBB, so add it as live-in.
9038 DoneMBB->addLiveIn(SystemZ::CC);
9039 }
9040
9041 // LoopMBB:
9042 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
9043 // [ %NextDestReg, NextMBB ]
9044 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
9045 // [ %NextSrcReg, NextMBB ]
9046 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
9047 // [ %NextCountReg, NextMBB ]
9048 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
9049 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
9050 // ( JLH EndMBB )
9051 //
9052 // The prefetch is used only for MVC. The JLH is used only for CLC.
9053 MBB = LoopMBB;
9054 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
9055 .addReg(StartDestReg).addMBB(StartMBB)
9056 .addReg(NextDestReg).addMBB(NextMBB);
9057 if (!HaveSingleBase)
9058 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
9059 .addReg(StartSrcReg).addMBB(StartMBB)
9060 .addReg(NextSrcReg).addMBB(NextMBB);
9061 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
9062 .addReg(StartCountReg).addMBB(StartMBB)
9063 .addReg(NextCountReg).addMBB(NextMBB);
9064 if (Opcode == SystemZ::MVC)
9065 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
9067 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
9068 insertMemMemOp(MBB, MBB->end(),
9069 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
9070 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
9071 if (EndMBB) {
9072 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9074 .addMBB(EndMBB);
9075 MBB->addSuccessor(EndMBB);
9076 MBB->addSuccessor(NextMBB);
9077 }
9078
9079 // NextMBB:
9080 // %NextDestReg = LA 256(%ThisDestReg)
9081 // %NextSrcReg = LA 256(%ThisSrcReg)
9082 // %NextCountReg = AGHI %ThisCountReg, -1
9083 // CGHI %NextCountReg, 0
9084 // JLH LoopMBB
9085 // # fall through to DoneMBB
9086 //
9087 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
9088 MBB = NextMBB;
9089 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
9090 .addReg(ThisDestReg).addImm(256).addReg(0);
9091 if (!HaveSingleBase)
9092 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
9093 .addReg(ThisSrcReg).addImm(256).addReg(0);
9094 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
9095 .addReg(ThisCountReg).addImm(-1);
9096 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9097 .addReg(NextCountReg).addImm(0);
9098 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9100 .addMBB(LoopMBB);
9101 MBB->addSuccessor(LoopMBB);
9102 MBB->addSuccessor(DoneMBB);
9103
9104 MBB = DoneMBB;
9105 if (IsRegForm) {
9106 // DoneMBB:
9107 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
9108 // # Use EXecute Relative Long for the remainder of the bytes. The target
9109 // instruction of the EXRL will have a length field of 1 since 0 is an
9110 // illegal value. The number of bytes processed becomes (%LenAdjReg &
9111 // 0xff) + 1.
9112 // # Fall through to AllDoneMBB.
9113 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9114 Register RemDestReg = HaveSingleBase ? RemSrcReg
9115 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9116 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
9117 .addReg(StartDestReg).addMBB(StartMBB)
9118 .addReg(NextDestReg).addMBB(NextMBB);
9119 if (!HaveSingleBase)
9120 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
9121 .addReg(StartSrcReg).addMBB(StartMBB)
9122 .addReg(NextSrcReg).addMBB(NextMBB);
9123 if (IsMemset)
9124 insertMemMemOp(MBB, MBB->end(),
9125 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
9126 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
9127 MachineInstrBuilder EXRL_MIB =
9128 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
9129 .addImm(Opcode)
9130 .addReg(LenAdjReg)
9131 .addReg(RemDestReg).addImm(DestDisp)
9132 .addReg(RemSrcReg).addImm(SrcDisp);
9133 MBB->addSuccessor(AllDoneMBB);
9134 MBB = AllDoneMBB;
9135 if (Opcode != SystemZ::MVC) {
9136 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
9137 if (EndMBB)
9138 MBB->addLiveIn(SystemZ::CC);
9139 }
9140 }
9142 }
9143
9144 // Handle any remaining bytes with straight-line code.
9145 while (ImmLength > 0) {
9146 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
9147 // The previous iteration might have created out-of-range displacements.
9148 // Apply them using LA/LAY if so.
9149 foldDisplIfNeeded(DestBase, DestDisp);
9150 foldDisplIfNeeded(SrcBase, SrcDisp);
9151 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
9152 DestDisp += ThisLength;
9153 SrcDisp += ThisLength;
9154 ImmLength -= ThisLength;
9155 // If there's another CLC to go, branch to the end if a difference
9156 // was found.
9157 if (EndMBB && ImmLength > 0) {
9159 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9161 .addMBB(EndMBB);
9162 MBB->addSuccessor(EndMBB);
9163 MBB->addSuccessor(NextMBB);
9164 MBB = NextMBB;
9165 }
9166 }
9167 if (EndMBB) {
9168 MBB->addSuccessor(EndMBB);
9169 MBB = EndMBB;
9170 MBB->addLiveIn(SystemZ::CC);
9171 }
9172
9173 MI.eraseFromParent();
9174 return MBB;
9175}
9176
9177// Decompose string pseudo-instruction MI into a loop that continually performs
9178// Opcode until CC != 3.
9179MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
9180 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9181 MachineFunction &MF = *MBB->getParent();
9182 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9184 DebugLoc DL = MI.getDebugLoc();
9185
9186 uint64_t End1Reg = MI.getOperand(0).getReg();
9187 uint64_t Start1Reg = MI.getOperand(1).getReg();
9188 uint64_t Start2Reg = MI.getOperand(2).getReg();
9189 uint64_t CharReg = MI.getOperand(3).getReg();
9190
9191 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
9192 uint64_t This1Reg = MRI.createVirtualRegister(RC);
9193 uint64_t This2Reg = MRI.createVirtualRegister(RC);
9194 uint64_t End2Reg = MRI.createVirtualRegister(RC);
9195
9196 MachineBasicBlock *StartMBB = MBB;
9198 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9199
9200 // StartMBB:
9201 // # fall through to LoopMBB
9202 MBB->addSuccessor(LoopMBB);
9203
9204 // LoopMBB:
9205 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
9206 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
9207 // R0L = %CharReg
9208 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
9209 // JO LoopMBB
9210 // # fall through to DoneMBB
9211 //
9212 // The load of R0L can be hoisted by post-RA LICM.
9213 MBB = LoopMBB;
9214
9215 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
9216 .addReg(Start1Reg).addMBB(StartMBB)
9217 .addReg(End1Reg).addMBB(LoopMBB);
9218 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
9219 .addReg(Start2Reg).addMBB(StartMBB)
9220 .addReg(End2Reg).addMBB(LoopMBB);
9221 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
9222 BuildMI(MBB, DL, TII->get(Opcode))
9223 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
9224 .addReg(This1Reg).addReg(This2Reg);
9225 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9227 MBB->addSuccessor(LoopMBB);
9228 MBB->addSuccessor(DoneMBB);
9229
9230 DoneMBB->addLiveIn(SystemZ::CC);
9231
9232 MI.eraseFromParent();
9233 return DoneMBB;
9234}
9235
9236// Update TBEGIN instruction with final opcode and register clobbers.
9237MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
9238 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
9239 bool NoFloat) const {
9240 MachineFunction &MF = *MBB->getParent();
9241 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9242 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9243
9244 // Update opcode.
9245 MI.setDesc(TII->get(Opcode));
9246
9247 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
9248 // Make sure to add the corresponding GRSM bits if they are missing.
9249 uint64_t Control = MI.getOperand(2).getImm();
9250 static const unsigned GPRControlBit[16] = {
9251 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
9252 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
9253 };
9254 Control |= GPRControlBit[15];
9255 if (TFI->hasFP(MF))
9256 Control |= GPRControlBit[11];
9257 MI.getOperand(2).setImm(Control);
9258
9259 // Add GPR clobbers.
9260 for (int I = 0; I < 16; I++) {
9261 if ((Control & GPRControlBit[I]) == 0) {
9262 unsigned Reg = SystemZMC::GR64Regs[I];
9263 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9264 }
9265 }
9266
9267 // Add FPR/VR clobbers.
9268 if (!NoFloat && (Control & 4) != 0) {
9269 if (Subtarget.hasVector()) {
9270 for (unsigned Reg : SystemZMC::VR128Regs) {
9271 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9272 }
9273 } else {
9274 for (unsigned Reg : SystemZMC::FP64Regs) {
9275 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9276 }
9277 }
9278 }
9279
9280 return MBB;
9281}
9282
9283MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
9284 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9285 MachineFunction &MF = *MBB->getParent();
9287 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9288 DebugLoc DL = MI.getDebugLoc();
9289
9290 Register SrcReg = MI.getOperand(0).getReg();
9291
9292 // Create new virtual register of the same class as source.
9293 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
9294 Register DstReg = MRI->createVirtualRegister(RC);
9295
9296 // Replace pseudo with a normal load-and-test that models the def as
9297 // well.
9298 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
9299 .addReg(SrcReg)
9300 .setMIFlags(MI.getFlags());
9301 MI.eraseFromParent();
9302
9303 return MBB;
9304}
9305
9306MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
9308 MachineFunction &MF = *MBB->getParent();
9310 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9311 DebugLoc DL = MI.getDebugLoc();
9312 const unsigned ProbeSize = getStackProbeSize(MF);
9313 Register DstReg = MI.getOperand(0).getReg();
9314 Register SizeReg = MI.getOperand(2).getReg();
9315
9316 MachineBasicBlock *StartMBB = MBB;
9318 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
9319 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
9320 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
9321 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
9322
9325
9326 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9327 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9328
9329 // LoopTestMBB
9330 // BRC TailTestMBB
9331 // # fallthrough to LoopBodyMBB
9332 StartMBB->addSuccessor(LoopTestMBB);
9333 MBB = LoopTestMBB;
9334 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
9335 .addReg(SizeReg)
9336 .addMBB(StartMBB)
9337 .addReg(IncReg)
9338 .addMBB(LoopBodyMBB);
9339 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
9340 .addReg(PHIReg)
9341 .addImm(ProbeSize);
9342 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9344 .addMBB(TailTestMBB);
9345 MBB->addSuccessor(LoopBodyMBB);
9346 MBB->addSuccessor(TailTestMBB);
9347
9348 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
9349 // J LoopTestMBB
9350 MBB = LoopBodyMBB;
9351 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
9352 .addReg(PHIReg)
9353 .addImm(ProbeSize);
9354 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
9355 .addReg(SystemZ::R15D)
9356 .addImm(ProbeSize);
9357 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9358 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
9359 .setMemRefs(VolLdMMO);
9360 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
9361 MBB->addSuccessor(LoopTestMBB);
9362
9363 // TailTestMBB
9364 // BRC DoneMBB
9365 // # fallthrough to TailMBB
9366 MBB = TailTestMBB;
9367 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9368 .addReg(PHIReg)
9369 .addImm(0);
9370 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9372 .addMBB(DoneMBB);
9373 MBB->addSuccessor(TailMBB);
9374 MBB->addSuccessor(DoneMBB);
9375
9376 // TailMBB
9377 // # fallthrough to DoneMBB
9378 MBB = TailMBB;
9379 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
9380 .addReg(SystemZ::R15D)
9381 .addReg(PHIReg);
9382 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9383 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
9384 .setMemRefs(VolLdMMO);
9385 MBB->addSuccessor(DoneMBB);
9386
9387 // DoneMBB
9388 MBB = DoneMBB;
9389 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
9390 .addReg(SystemZ::R15D);
9391
9392 MI.eraseFromParent();
9393 return DoneMBB;
9394}
9395
9396SDValue SystemZTargetLowering::
9397getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
9399 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
9400 SDLoc DL(SP);
9401 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
9402 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
9403}
9404
9407 switch (MI.getOpcode()) {
9408 case SystemZ::ADJCALLSTACKDOWN:
9409 case SystemZ::ADJCALLSTACKUP:
9410 return emitAdjCallStack(MI, MBB);
9411
9412 case SystemZ::Select32:
9413 case SystemZ::Select64:
9414 case SystemZ::Select128:
9415 case SystemZ::SelectF32:
9416 case SystemZ::SelectF64:
9417 case SystemZ::SelectF128:
9418 case SystemZ::SelectVR32:
9419 case SystemZ::SelectVR64:
9420 case SystemZ::SelectVR128:
9421 return emitSelect(MI, MBB);
9422
9423 case SystemZ::CondStore8Mux:
9424 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
9425 case SystemZ::CondStore8MuxInv:
9426 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
9427 case SystemZ::CondStore16Mux:
9428 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
9429 case SystemZ::CondStore16MuxInv:
9430 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
9431 case SystemZ::CondStore32Mux:
9432 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
9433 case SystemZ::CondStore32MuxInv:
9434 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
9435 case SystemZ::CondStore8:
9436 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
9437 case SystemZ::CondStore8Inv:
9438 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
9439 case SystemZ::CondStore16:
9440 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
9441 case SystemZ::CondStore16Inv:
9442 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
9443 case SystemZ::CondStore32:
9444 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
9445 case SystemZ::CondStore32Inv:
9446 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
9447 case SystemZ::CondStore64:
9448 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
9449 case SystemZ::CondStore64Inv:
9450 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
9451 case SystemZ::CondStoreF32:
9452 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
9453 case SystemZ::CondStoreF32Inv:
9454 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
9455 case SystemZ::CondStoreF64:
9456 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
9457 case SystemZ::CondStoreF64Inv:
9458 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
9459
9460 case SystemZ::SCmp128Hi:
9461 return emitICmp128Hi(MI, MBB, false);
9462 case SystemZ::UCmp128Hi:
9463 return emitICmp128Hi(MI, MBB, true);
9464
9465 case SystemZ::PAIR128:
9466 return emitPair128(MI, MBB);
9467 case SystemZ::AEXT128:
9468 return emitExt128(MI, MBB, false);
9469 case SystemZ::ZEXT128:
9470 return emitExt128(MI, MBB, true);
9471
9472 case SystemZ::ATOMIC_SWAPW:
9473 return emitAtomicLoadBinary(MI, MBB, 0);
9474
9475 case SystemZ::ATOMIC_LOADW_AR:
9476 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
9477 case SystemZ::ATOMIC_LOADW_AFI:
9478 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
9479
9480 case SystemZ::ATOMIC_LOADW_SR:
9481 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
9482
9483 case SystemZ::ATOMIC_LOADW_NR:
9484 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
9485 case SystemZ::ATOMIC_LOADW_NILH:
9486 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
9487
9488 case SystemZ::ATOMIC_LOADW_OR:
9489 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
9490 case SystemZ::ATOMIC_LOADW_OILH:
9491 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
9492
9493 case SystemZ::ATOMIC_LOADW_XR:
9494 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
9495 case SystemZ::ATOMIC_LOADW_XILF:
9496 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
9497
9498 case SystemZ::ATOMIC_LOADW_NRi:
9499 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
9500 case SystemZ::ATOMIC_LOADW_NILHi:
9501 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
9502
9503 case SystemZ::ATOMIC_LOADW_MIN:
9504 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
9505 case SystemZ::ATOMIC_LOADW_MAX:
9506 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
9507 case SystemZ::ATOMIC_LOADW_UMIN:
9508 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
9509 case SystemZ::ATOMIC_LOADW_UMAX:
9510 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
9511
9512 case SystemZ::ATOMIC_CMP_SWAPW:
9513 return emitAtomicCmpSwapW(MI, MBB);
9514 case SystemZ::MVCImm:
9515 case SystemZ::MVCReg:
9516 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
9517 case SystemZ::NCImm:
9518 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
9519 case SystemZ::OCImm:
9520 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
9521 case SystemZ::XCImm:
9522 case SystemZ::XCReg:
9523 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
9524 case SystemZ::CLCImm:
9525 case SystemZ::CLCReg:
9526 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
9527 case SystemZ::MemsetImmImm:
9528 case SystemZ::MemsetImmReg:
9529 case SystemZ::MemsetRegImm:
9530 case SystemZ::MemsetRegReg:
9531 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
9532 case SystemZ::CLSTLoop:
9533 return emitStringWrapper(MI, MBB, SystemZ::CLST);
9534 case SystemZ::MVSTLoop:
9535 return emitStringWrapper(MI, MBB, SystemZ::MVST);
9536 case SystemZ::SRSTLoop:
9537 return emitStringWrapper(MI, MBB, SystemZ::SRST);
9538 case SystemZ::TBEGIN:
9539 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
9540 case SystemZ::TBEGIN_nofloat:
9541 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
9542 case SystemZ::TBEGINC:
9543 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
9544 case SystemZ::LTEBRCompare_Pseudo:
9545 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
9546 case SystemZ::LTDBRCompare_Pseudo:
9547 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
9548 case SystemZ::LTXBRCompare_Pseudo:
9549 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
9550
9551 case SystemZ::PROBED_ALLOCA:
9552 return emitProbedAlloca(MI, MBB);
9553
9554 case TargetOpcode::STACKMAP:
9555 case TargetOpcode::PATCHPOINT:
9556 return emitPatchPoint(MI, MBB);
9557
9558 default:
9559 llvm_unreachable("Unexpected instr type to insert");
9560 }
9561}
9562
9563// This is only used by the isel schedulers, and is needed only to prevent
9564// compiler from crashing when list-ilp is used.
9565const TargetRegisterClass *
9566SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
9567 if (VT == MVT::Untyped)
9568 return &SystemZ::ADDR128BitRegClass;
9570}
9571
9572SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
9573 SelectionDAG &DAG) const {
9574 SDLoc dl(Op);
9575 /*
9576 The rounding method is in FPC Byte 3 bits 6-7, and has the following
9577 settings:
9578 00 Round to nearest
9579 01 Round to 0
9580 10 Round to +inf
9581 11 Round to -inf
9582
9583 FLT_ROUNDS, on the other hand, expects the following:
9584 -1 Undefined
9585 0 Round to 0
9586 1 Round to nearest
9587 2 Round to +inf
9588 3 Round to -inf
9589 */
9590
9591 // Save FPC to register.
9592 SDValue Chain = Op.getOperand(0);
9593 SDValue EFPC(
9594 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
9595 Chain = EFPC.getValue(1);
9596
9597 // Transform as necessary
9598 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
9599 DAG.getConstant(3, dl, MVT::i32));
9600 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
9601 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
9602 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
9603 DAG.getConstant(1, dl, MVT::i32)));
9604
9605 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
9606 DAG.getConstant(1, dl, MVT::i32));
9607 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
9608
9609 return DAG.getMergeValues({RetVal, Chain}, dl);
9610}
9611
9612SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
9613 SelectionDAG &DAG) const {
9614 EVT VT = Op.getValueType();
9615 Op = Op.getOperand(0);
9616 EVT OpVT = Op.getValueType();
9617
9618 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
9619
9620 SDLoc DL(Op);
9621
9622 // load a 0 vector for the third operand of VSUM.
9623 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));
9624
9625 // execute VSUM.
9626 switch (OpVT.getScalarSizeInBits()) {
9627 case 8:
9628 case 16:
9629 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
9631 case 32:
9632 case 64:
9633 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
9634 DAG.getBitcast(Op.getValueType(), Zero));
9635 break;
9636 case 128:
9637 break; // VSUM over v1i128 should not happen and would be a noop
9638 default:
9639 llvm_unreachable("Unexpected scalar size.");
9640 }
9641 // Cast to original vector type, retrieve last element.
9642 return DAG.getNode(
9643 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),
9644 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));
9645}
unsigned const MachineRegisterInfo * MRI
#define Success
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
amdgpu AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
Function Alias Analysis Results
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:301
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
IRTranslator LLVM IR MI
iv Induction Variable Users
Definition: IVUsers.cpp:48
#define RegName(no)
lazy value info
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
uint64_t High
LLVMContext & Context
#define P(N)
const char LLVMTargetMachineRef TM
static bool isSelectPseudo(MachineInstr &MI)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
#define OPCODE(NAME)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1364
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition: APInt.h:300
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
@ Add
*p = old + v
Definition: Instructions.h:764
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ Xor
*p = old ^ v
Definition: Instructions.h:774
BinOp getOperation() const
Definition: Instructions.h:845
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
The address of a basic block.
Definition: Constants.h:889
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:701
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:713
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:263
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:675
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:563
bool hasPrivateLinkage() const
Definition: GlobalValue.h:527
bool hasInternalLinkage() const
Definition: GlobalValue.h:526
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:184
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setMaxCallFrameSize(unsigned S)
MachineFunctionProperties & reset(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr kills the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr fully defines the specified register.
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:721
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:477
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:731
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:827
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:471
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:861
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:472
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:772
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:675
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:767
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:468
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:798
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:844
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:484
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:738
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:553
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void reserve(size_type N)
Definition: SmallVector.h:676
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:466
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:680
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
iterator end() const
Definition: StringRef.h:113
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
A SystemZ-specific class detailing special use registers particular for calling conventions.
A SystemZ-specific constant pool value.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const
const TargetFrameLowering * getFrameLowering() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
const SystemZRegisterInfo * getRegisterInfo() const override
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
bool useSoftFloat() const override
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Let target indicate that an extending atomic load of the specified type is legal.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isFP128Ty() const
Return true if this is 'fp128'.
Definition: Type.h:163
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
use_iterator use_begin()
Definition: Value.h:360
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1126
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1122
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:723
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:476
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1269
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:714
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1155
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1271
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1241
@ STRICT_FCEIL
Definition: ISDOpcodes.h:426
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1272
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1031
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:483
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
Definition: ISDOpcodes.h:1254
@ STRICT_FMINIMUM
Definition: ISDOpcodes.h:436
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:790
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:688
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1228
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1233
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:820
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:477
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1267
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1268
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:411
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1400
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:450
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:620
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1221
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:988
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:930
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1077
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:327
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1270
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1056
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:507
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1237
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:222
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:627
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1151
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:323
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:430
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1364
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:880
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:651
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1265
@ STRICT_FMAXIMUM
Definition: ISDOpcodes.h:435
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:424
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:425
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:742
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1273
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:971
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1041
@ ConstantPool
Definition: ISDOpcodes.h:82
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:809
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:888
@ STRICT_FROUND
Definition: ISDOpcodes.h:428
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:736
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:303
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:449
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:427
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1263
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:443
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:465
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:442
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:984
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1264
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:836
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1182
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:470
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1208
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:400
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:524
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1262
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:869
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:831
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:423
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1070
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:763
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:493
@ AssertZext
Definition: ISDOpcodes.h:62
@ STRICT_FRINT
Definition: ISDOpcodes.h:422
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1320
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:515
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1523
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1503
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition: SystemZ.h:40
static bool isImmHH(uint64_t Val)
Definition: SystemZ.h:176
const unsigned CCMASK_TEND
Definition: SystemZ.h:97
const unsigned CCMASK_CS_EQ
Definition: SystemZ.h:67
const unsigned CCMASK_TBEGIN
Definition: SystemZ.h:92
const unsigned CCMASK_0
Definition: SystemZ.h:27
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition: SystemZ.h:82
const unsigned CCMASK_LOGICAL_CARRY
Definition: SystemZ.h:60
const unsigned TDCMASK_NORMAL_MINUS
Definition: SystemZ.h:122
const unsigned CCMASK_TDC
Definition: SystemZ.h:109
const unsigned CCMASK_FCMP
Definition: SystemZ.h:48
const unsigned CCMASK_TM_SOME_0
Definition: SystemZ.h:81
static bool isImmHL(uint64_t Val)
Definition: SystemZ.h:171
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition: SystemZ.h:124
const unsigned PFD_READ
Definition: SystemZ.h:115
const unsigned CCMASK_1
Definition: SystemZ.h:28
const unsigned TDCMASK_NORMAL_PLUS
Definition: SystemZ.h:121
const unsigned PFD_WRITE
Definition: SystemZ.h:116
const unsigned CCMASK_CMP_GT
Definition: SystemZ.h:37
const unsigned TDCMASK_QNAN_MINUS
Definition: SystemZ.h:128
const unsigned CCMASK_CS
Definition: SystemZ.h:69
const unsigned CCMASK_ANY
Definition: SystemZ.h:31
const unsigned CCMASK_ARITH
Definition: SystemZ.h:55
const unsigned CCMASK_TM_MIXED_MSB_0
Definition: SystemZ.h:78
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition: SystemZ.h:123
static bool isImmLL(uint64_t Val)
Definition: SystemZ.h:161
const unsigned VectorBits
Definition: SystemZ.h:154
static bool isImmLH(uint64_t Val)
Definition: SystemZ.h:166
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition: SystemZ.h:125
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition: SystemZ.h:77
const unsigned IPM_CC
Definition: SystemZ.h:112
const unsigned CCMASK_CMP_LE
Definition: SystemZ.h:39
const unsigned CCMASK_CMP_O
Definition: SystemZ.h:44
const unsigned CCMASK_CMP_EQ
Definition: SystemZ.h:35
const unsigned VectorBytes
Definition: SystemZ.h:158
const unsigned TDCMASK_INFINITY_MINUS
Definition: SystemZ.h:126
const unsigned CCMASK_ICMP
Definition: SystemZ.h:47
const unsigned CCMASK_VCMP_ALL
Definition: SystemZ.h:101
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition: SystemZ.h:104
const unsigned CCMASK_TM_MIXED_MSB_1
Definition: SystemZ.h:79
const unsigned CCMASK_TM_MSB_0
Definition: SystemZ.h:83
const unsigned CCMASK_ARITH_OVERFLOW
Definition: SystemZ.h:54
const unsigned CCMASK_CS_NE
Definition: SystemZ.h:68
const unsigned TDCMASK_SNAN_PLUS
Definition: SystemZ.h:129
const unsigned CCMASK_TM
Definition: SystemZ.h:85
const unsigned CCMASK_3
Definition: SystemZ.h:30
const unsigned CCMASK_CMP_LT
Definition: SystemZ.h:36
const unsigned CCMASK_CMP_NE
Definition: SystemZ.h:38
const unsigned TDCMASK_ZERO_PLUS
Definition: SystemZ.h:119
const unsigned TDCMASK_QNAN_PLUS
Definition: SystemZ.h:127
const unsigned TDCMASK_ZERO_MINUS
Definition: SystemZ.h:120
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition: SystemZ.h:80
const unsigned CCMASK_LOGICAL_BORROW
Definition: SystemZ.h:62
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition: SystemZ.h:43
const unsigned CCMASK_LOGICAL
Definition: SystemZ.h:64
const unsigned CCMASK_TM_MSB_1
Definition: SystemZ.h:84
const unsigned TDCMASK_SNAN_MINUS
Definition: SystemZ.h:130
@ GeneralDynamic
Definition: CodeGen.h:46
@ GS
Definition: X86.h:205
Reg
All possible values of the reg field in the ModR/M byte.
support::ulittle32_t Word
Definition: IRSymtab.h:52
NodeAddr< CodeNode * > Code
Definition: RDFGraph.h:388
constexpr const char32_t SBase
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:326
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Done
Definition: Threading.h:61
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ Mul
Product of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:452
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define EQ(a, b)
Definition: regexec.c:112
#define NC
Definition: regutils.h:42
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:182
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:168
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:307
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:176
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:141
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})