LLVM 19.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
24#include "llvm/IR/Intrinsics.h"
25#include "llvm/IR/IntrinsicsS390.h"
29#include <cctype>
30#include <optional>
31
32using namespace llvm;
33
34#define DEBUG_TYPE "systemz-lower"
35
36namespace {
37// Represents information about a comparison.
38struct Comparison {
39 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
40 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
41 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
42
43 // The operands to the comparison.
44 SDValue Op0, Op1;
45
46 // Chain if this is a strict floating-point comparison.
47 SDValue Chain;
48
49 // The opcode that should be used to compare Op0 and Op1.
50 unsigned Opcode;
51
52 // A SystemZICMP value. Only used for integer comparisons.
53 unsigned ICmpType;
54
55 // The mask of CC values that Opcode can produce.
56 unsigned CCValid;
57
58 // The mask of CC values for which the original condition is true.
59 unsigned CCMask;
60};
61} // end anonymous namespace
62
63// Classify VT as either 32 or 64 bit.
64static bool is32Bit(EVT VT) {
65 switch (VT.getSimpleVT().SimpleTy) {
66 case MVT::i32:
67 return true;
68 case MVT::i64:
69 return false;
70 default:
71 llvm_unreachable("Unsupported type");
72 }
73}
74
75// Return a version of MachineOperand that can be safely used before the
76// final use.
78 if (Op.isReg())
79 Op.setIsKill(false);
80 return Op;
81}
82
84 const SystemZSubtarget &STI)
85 : TargetLowering(TM), Subtarget(STI) {
86 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
87
88 auto *Regs = STI.getSpecialRegisters();
89
90 // Set up the register classes.
91 if (Subtarget.hasHighWord())
92 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
93 else
94 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
95 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
96 if (!useSoftFloat()) {
97 if (Subtarget.hasVector()) {
98 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
99 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
100 } else {
101 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
102 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
103 }
104 if (Subtarget.hasVectorEnhancements1())
105 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
106 else
107 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
108
109 if (Subtarget.hasVector()) {
110 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
111 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
112 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
113 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
114 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
115 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
116 }
117
118 if (Subtarget.hasVector())
119 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
120 }
121
122 // Compute derived properties from the register classes
124
125 // Set up special registers.
126 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
127
128 // TODO: It may be better to default to latency-oriented scheduling, however
129 // LLVM's current latency-oriented scheduler can't handle physreg definitions
130 // such as SystemZ has with CC, so set this to the register-pressure
131 // scheduler, because it can.
133
136
138
139 // Instructions are strings of 2-byte aligned 2-byte values.
141 // For performance reasons we prefer 16-byte alignment.
143
144 // Handle operations that are handled in a similar way for all types.
145 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
146 I <= MVT::LAST_FP_VALUETYPE;
147 ++I) {
149 if (isTypeLegal(VT)) {
150 // Lower SET_CC into an IPM-based sequence.
154
155 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
157
158 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
161 }
162 }
163
164 // Expand jump table branches as address arithmetic followed by an
165 // indirect jump.
167
168 // Expand BRCOND into a BR_CC (see above).
170
171 // Handle integer types except i128.
172 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
173 I <= MVT::LAST_INTEGER_VALUETYPE;
174 ++I) {
176 if (isTypeLegal(VT) && VT != MVT::i128) {
178
179 // Expand individual DIV and REMs into DIVREMs.
186
187 // Support addition/subtraction with overflow.
190
191 // Support addition/subtraction with carry.
194
195 // Support carry in as value rather than glue.
198
199 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
200 // available, or if the operand is constant.
202
203 // Use POPCNT on z196 and above.
204 if (Subtarget.hasPopulationCount())
206 else
208
209 // No special instructions for these.
212
213 // Use *MUL_LOHI where possible instead of MULH*.
218
219 // Only z196 and above have native support for conversions to unsigned.
220 // On z10, promoting to i64 doesn't generate an inexact condition for
221 // values that are outside the i32 range but in the i64 range, so use
222 // the default expansion.
223 if (!Subtarget.hasFPExtension())
225
226 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
227 // default to Expand, so need to be modified to Legal where appropriate.
229 if (Subtarget.hasFPExtension())
231
232 // And similarly for STRICT_[SU]INT_TO_FP.
234 if (Subtarget.hasFPExtension())
236 }
237 }
238
239 // Handle i128 if legal.
240 if (isTypeLegal(MVT::i128)) {
241 // No special instructions for these.
257
258 // Support addition/subtraction with carry.
263
264 // Use VPOPCT and add up partial results.
266
267 // We have to use libcalls for these.
276 }
277
278 // Type legalization will convert 8- and 16-bit atomic operations into
279 // forms that operate on i32s (but still keeping the original memory VT).
280 // Lower them into full i32 operations.
292
293 // Whether or not i128 is not a legal type, we need to custom lower
294 // the atomic operations in order to exploit SystemZ instructions.
297
298 // Mark sign/zero extending atomic loads as legal, which will make
299 // DAGCombiner fold extensions into atomic loads if possible.
301 {MVT::i8, MVT::i16, MVT::i32}, Legal);
303 {MVT::i8, MVT::i16}, Legal);
305 MVT::i8, Legal);
306
307 // We can use the CC result of compare-and-swap to implement
308 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
312
314
315 // Traps are legal, as we will convert them to "j .+2".
316 setOperationAction(ISD::TRAP, MVT::Other, Legal);
317
318 // z10 has instructions for signed but not unsigned FP conversion.
319 // Handle unsigned 32-bit types as signed 64-bit types.
320 if (!Subtarget.hasFPExtension()) {
325 }
326
327 // We have native support for a 64-bit CTLZ, via FLOGR.
331
332 // On z15 we have native support for a 64-bit CTPOP.
333 if (Subtarget.hasMiscellaneousExtensions3()) {
336 }
337
338 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
340
341 // Expand 128 bit shifts without using a libcall.
345 setLibcallName(RTLIB::SRL_I128, nullptr);
346 setLibcallName(RTLIB::SHL_I128, nullptr);
347 setLibcallName(RTLIB::SRA_I128, nullptr);
348
349 // Also expand 256 bit shifts if i128 is a legal type.
350 if (isTypeLegal(MVT::i128)) {
354 }
355
356 // Handle bitcast from fp128 to i128.
357 if (!isTypeLegal(MVT::i128))
359
360 // We have native instructions for i8, i16 and i32 extensions, but not i1.
362 for (MVT VT : MVT::integer_valuetypes()) {
366 }
367
368 // Handle the various types of symbolic address.
374
375 // We need to handle dynamic allocations specially because of the
376 // 160-byte area at the bottom of the stack.
379
382
383 // Handle prefetches with PFD or PFDRL.
385
386 // Handle readcyclecounter with STCKF.
388
390 // Assume by default that all vector operations need to be expanded.
391 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
392 if (getOperationAction(Opcode, VT) == Legal)
393 setOperationAction(Opcode, VT, Expand);
394
395 // Likewise all truncating stores and extending loads.
396 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
397 setTruncStoreAction(VT, InnerVT, Expand);
400 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
401 }
402
403 if (isTypeLegal(VT)) {
404 // These operations are legal for anything that can be stored in a
405 // vector register, even if there is no native support for the format
406 // as such. In particular, we can do these for v4f32 even though there
407 // are no specific instructions for that format.
413
414 // Likewise, except that we need to replace the nodes with something
415 // more specific.
418 }
419 }
420
421 // Handle integer vector types.
423 if (isTypeLegal(VT)) {
424 // These operations have direct equivalents.
429 if (VT != MVT::v2i64)
435 if (Subtarget.hasVectorEnhancements1())
437 else
441
442 // Convert a GPR scalar to a vector by inserting it into element 0.
444
445 // Use a series of unpacks for extensions.
448
449 // Detect shifts/rotates by a scalar amount and convert them into
450 // V*_BY_SCALAR.
455
456 // Add ISD::VECREDUCE_ADD as custom in order to implement
457 // it with VZERO+VSUM
459
460 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
461 // and inverting the result as necessary.
463 }
464 }
465
466 if (Subtarget.hasVector()) {
467 // There should be no need to check for float types other than v2f64
468 // since <2 x f32> isn't a legal type.
477
486 }
487
488 if (Subtarget.hasVectorEnhancements2()) {
497
506 }
507
508 // Handle floating-point types.
509 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
510 I <= MVT::LAST_FP_VALUETYPE;
511 ++I) {
513 if (isTypeLegal(VT)) {
514 // We can use FI for FRINT.
516
517 // We can use the extended form of FI for other rounding operations.
518 if (Subtarget.hasFPExtension()) {
524 }
525
526 // No special instructions for these.
532
533 // Special treatment.
535
536 // Handle constrained floating-point operations.
546 if (Subtarget.hasFPExtension()) {
552 }
553 }
554 }
555
556 // Handle floating-point vector types.
557 if (Subtarget.hasVector()) {
558 // Scalar-to-vector conversion is just a subreg.
561
562 // Some insertions and extractions can be done directly but others
563 // need to go via integers.
568
569 // These operations have direct equivalents.
570 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
571 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
572 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
573 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
574 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
575 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
576 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
577 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
578 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
581 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
584
585 // Handle constrained floating-point operations.
598
603 if (Subtarget.hasVectorEnhancements1()) {
606 }
607 }
608
609 // The vector enhancements facility 1 has instructions for these.
610 if (Subtarget.hasVectorEnhancements1()) {
611 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
612 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
613 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
614 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
615 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
616 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
617 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
618 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
619 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
622 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
625
630
635
640
645
650
651 // Handle constrained floating-point operations.
664 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
665 MVT::v4f32, MVT::v2f64 }) {
670 }
671 }
672
673 // We only have fused f128 multiply-addition on vector registers.
674 if (!Subtarget.hasVectorEnhancements1()) {
677 }
678
679 // We don't have a copysign instruction on vector registers.
680 if (Subtarget.hasVectorEnhancements1())
682
683 // Needed so that we don't try to implement f128 constant loads using
684 // a load-and-extend of a f80 constant (in cases where the constant
685 // would fit in an f80).
686 for (MVT VT : MVT::fp_valuetypes())
687 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
688
689 // We don't have extending load instruction on vector registers.
690 if (Subtarget.hasVectorEnhancements1()) {
691 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
692 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
693 }
694
695 // Floating-point truncation and stores need to be done separately.
696 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
697 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
698 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
699
700 // We have 64-bit FPR<->GPR moves, but need special handling for
701 // 32-bit forms.
702 if (!Subtarget.hasVector()) {
705 }
706
707 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
708 // structure, but VAEND is a no-op.
712
714
715 // Codes for which we want to perform some z-specific combinations.
719 ISD::LOAD,
730 ISD::SDIV,
731 ISD::UDIV,
732 ISD::SREM,
733 ISD::UREM,
736
737 // Handle intrinsics.
740
741 // We want to use MVC in preference to even a single load/store pair.
742 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
744
745 // The main memset sequence is a byte store followed by an MVC.
746 // Two STC or MV..I stores win over that, but the kind of fused stores
747 // generated by target-independent code don't when the byte value is
748 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
749 // than "STC;MVC". Handle the choice in target-specific code instead.
750 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
752
753 // Default to having -disable-strictnode-mutation on
754 IsStrictFPEnabled = true;
755
756 if (Subtarget.isTargetzOS()) {
757 struct RTLibCallMapping {
758 RTLIB::Libcall Code;
759 const char *Name;
760 };
761 static RTLibCallMapping RTLibCallCommon[] = {
762#define HANDLE_LIBCALL(code, name) {RTLIB::code, name},
763#include "ZOSLibcallNames.def"
764 };
765 for (auto &E : RTLibCallCommon)
766 setLibcallName(E.Code, E.Name);
767 }
768}
769
771 return Subtarget.hasSoftFloat();
772}
773
775 LLVMContext &, EVT VT) const {
776 if (!VT.isVector())
777 return MVT::i32;
779}
780
782 const MachineFunction &MF, EVT VT) const {
783 VT = VT.getScalarType();
784
785 if (!VT.isSimple())
786 return false;
787
788 switch (VT.getSimpleVT().SimpleTy) {
789 case MVT::f32:
790 case MVT::f64:
791 return true;
792 case MVT::f128:
793 return Subtarget.hasVectorEnhancements1();
794 default:
795 break;
796 }
797
798 return false;
799}
800
801// Return true if the constant can be generated with a vector instruction,
802// such as VGM, VGMB or VREPI.
804 const SystemZSubtarget &Subtarget) {
805 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
806 if (!Subtarget.hasVector() ||
807 (isFP128 && !Subtarget.hasVectorEnhancements1()))
808 return false;
809
810 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
811 // preferred way of creating all-zero and all-one vectors so give it
812 // priority over other methods below.
813 unsigned Mask = 0;
814 unsigned I = 0;
815 for (; I < SystemZ::VectorBytes; ++I) {
816 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
817 if (Byte == 0xff)
818 Mask |= 1ULL << I;
819 else if (Byte != 0)
820 break;
821 }
822 if (I == SystemZ::VectorBytes) {
824 OpVals.push_back(Mask);
826 return true;
827 }
828
829 if (SplatBitSize > 64)
830 return false;
831
832 auto tryValue = [&](uint64_t Value) -> bool {
833 // Try VECTOR REPLICATE IMMEDIATE
834 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
835 if (isInt<16>(SignedValue)) {
836 OpVals.push_back(((unsigned) SignedValue));
839 SystemZ::VectorBits / SplatBitSize);
840 return true;
841 }
842 // Try VECTOR GENERATE MASK
843 unsigned Start, End;
844 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
845 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
846 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
847 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
848 OpVals.push_back(Start - (64 - SplatBitSize));
849 OpVals.push_back(End - (64 - SplatBitSize));
852 SystemZ::VectorBits / SplatBitSize);
853 return true;
854 }
855 return false;
856 };
857
858 // First try assuming that any undefined bits above the highest set bit
859 // and below the lowest set bit are 1s. This increases the likelihood of
860 // being able to use a sign-extended element value in VECTOR REPLICATE
861 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
862 uint64_t SplatBitsZ = SplatBits.getZExtValue();
863 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
864 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
865 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
866 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
867 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
868 if (tryValue(SplatBitsZ | Upper | Lower))
869 return true;
870
871 // Now try assuming that any undefined bits between the first and
872 // last defined set bits are set. This increases the chances of
873 // using a non-wraparound mask.
874 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
875 return tryValue(SplatBitsZ | Middle);
876}
877
879 if (IntImm.isSingleWord()) {
880 IntBits = APInt(128, IntImm.getZExtValue());
881 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
882 } else
883 IntBits = IntImm;
884 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
885
886 // Find the smallest splat.
887 SplatBits = IntImm;
888 unsigned Width = SplatBits.getBitWidth();
889 while (Width > 8) {
890 unsigned HalfSize = Width / 2;
891 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
892 APInt LowValue = SplatBits.trunc(HalfSize);
893
894 // If the two halves do not match, stop here.
895 if (HighValue != LowValue || 8 > HalfSize)
896 break;
897
898 SplatBits = HighValue;
899 Width = HalfSize;
900 }
901 SplatUndef = 0;
902 SplatBitSize = Width;
903}
904
906 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
907 bool HasAnyUndefs;
908
909 // Get IntBits by finding the 128 bit splat.
910 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
911 true);
912
913 // Get SplatBits by finding the 8 bit or greater splat.
914 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
915 true);
916}
917
919 bool ForCodeSize) const {
920 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
921 if (Imm.isZero() || Imm.isNegZero())
922 return true;
923
925}
926
927/// Returns true if stack probing through inline assembly is requested.
929 // If the function specifically requests inline stack probes, emit them.
930 if (MF.getFunction().hasFnAttribute("probe-stack"))
931 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
932 "inline-asm";
933 return false;
934}
935
938 // Lower fp128 the same way as i128.
939 if (LI->getType()->isFP128Ty())
942}
943
946 // Lower fp128 the same way as i128.
947 if (SI->getValueOperand()->getType()->isFP128Ty())
950}
951
954 // Don't expand subword operations as they require special treatment.
955 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
957
958 // Don't expand if there is a target instruction available.
959 if (Subtarget.hasInterlockedAccess1() &&
960 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
967
969}
970
972 // We can use CGFI or CLGFI.
973 return isInt<32>(Imm) || isUInt<32>(Imm);
974}
975
977 // We can use ALGFI or SLGFI.
978 return isUInt<32>(Imm) || isUInt<32>(-Imm);
979}
980
982 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
983 // Unaligned accesses should never be slower than the expanded version.
984 // We check specifically for aligned accesses in the few cases where
985 // they are required.
986 if (Fast)
987 *Fast = 1;
988 return true;
989}
990
991// Information about the addressing mode for a memory access.
993 // True if a long displacement is supported.
995
996 // True if use of index register is supported.
998
999 AddressingMode(bool LongDispl, bool IdxReg) :
1000 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
1001};
1002
1003// Return the desired addressing mode for a Load which has only one use (in
1004// the same block) which is a Store.
1006 Type *Ty) {
1007 // With vector support a Load->Store combination may be combined to either
1008 // an MVC or vector operations and it seems to work best to allow the
1009 // vector addressing mode.
1010 if (HasVector)
1011 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1012
1013 // Otherwise only the MVC case is special.
1014 bool MVC = Ty->isIntegerTy(8);
1015 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1016}
1017
1018// Return the addressing mode which seems most desirable given an LLVM
1019// Instruction pointer.
1020static AddressingMode
1022 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1023 switch (II->getIntrinsicID()) {
1024 default: break;
1025 case Intrinsic::memset:
1026 case Intrinsic::memmove:
1027 case Intrinsic::memcpy:
1028 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1029 }
1030 }
1031
1032 if (isa<LoadInst>(I) && I->hasOneUse()) {
1033 auto *SingleUser = cast<Instruction>(*I->user_begin());
1034 if (SingleUser->getParent() == I->getParent()) {
1035 if (isa<ICmpInst>(SingleUser)) {
1036 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1037 if (C->getBitWidth() <= 64 &&
1038 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1039 // Comparison of memory with 16 bit signed / unsigned immediate
1040 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1041 } else if (isa<StoreInst>(SingleUser))
1042 // Load->Store
1043 return getLoadStoreAddrMode(HasVector, I->getType());
1044 }
1045 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1046 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1047 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1048 // Load->Store
1049 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1050 }
1051
1052 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1053
1054 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1055 // dependencies (LDE only supports small offsets).
1056 // * Utilize the vector registers to hold floating point
1057 // values (vector load / store instructions only support small
1058 // offsets).
1059
1060 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1061 I->getOperand(0)->getType());
1062 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1063 bool IsVectorAccess = MemAccessTy->isVectorTy();
1064
1065 // A store of an extracted vector element will be combined into a VSTE type
1066 // instruction.
1067 if (!IsVectorAccess && isa<StoreInst>(I)) {
1068 Value *DataOp = I->getOperand(0);
1069 if (isa<ExtractElementInst>(DataOp))
1070 IsVectorAccess = true;
1071 }
1072
1073 // A load which gets inserted into a vector element will be combined into a
1074 // VLE type instruction.
1075 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1076 User *LoadUser = *I->user_begin();
1077 if (isa<InsertElementInst>(LoadUser))
1078 IsVectorAccess = true;
1079 }
1080
1081 if (IsFPAccess || IsVectorAccess)
1082 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1083 }
1084
1085 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1086}
1087
1089 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1090 // Punt on globals for now, although they can be used in limited
1091 // RELATIVE LONG cases.
1092 if (AM.BaseGV)
1093 return false;
1094
1095 // Require a 20-bit signed offset.
1096 if (!isInt<20>(AM.BaseOffs))
1097 return false;
1098
1099 bool RequireD12 =
1100 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1101 AddressingMode SupportedAM(!RequireD12, true);
1102 if (I != nullptr)
1103 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1104
1105 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1106 return false;
1107
1108 if (!SupportedAM.IndexReg)
1109 // No indexing allowed.
1110 return AM.Scale == 0;
1111 else
1112 // Indexing is OK but no scale factor can be applied.
1113 return AM.Scale == 0 || AM.Scale == 1;
1114}
1115
1117 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
1118 unsigned SrcAS, const AttributeList &FuncAttributes) const {
1119 const int MVCFastLen = 16;
1120
1121 if (Limit != ~unsigned(0)) {
1122 // Don't expand Op into scalar loads/stores in these cases:
1123 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1124 return false; // Small memcpy: Use MVC
1125 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1126 return false; // Small memset (first byte with STC/MVI): Use MVC
1127 if (Op.isZeroMemset())
1128 return false; // Memset zero: Use XC
1129 }
1130
1131 return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
1132 SrcAS, FuncAttributes);
1133}
1134
1136 const AttributeList &FuncAttributes) const {
1137 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1138}
1139
1140bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1141 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1142 return false;
1143 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1144 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1145 return FromBits > ToBits;
1146}
1147
1149 if (!FromVT.isInteger() || !ToVT.isInteger())
1150 return false;
1151 unsigned FromBits = FromVT.getFixedSizeInBits();
1152 unsigned ToBits = ToVT.getFixedSizeInBits();
1153 return FromBits > ToBits;
1154}
1155
1156//===----------------------------------------------------------------------===//
1157// Inline asm support
1158//===----------------------------------------------------------------------===//
1159
1162 if (Constraint.size() == 1) {
1163 switch (Constraint[0]) {
1164 case 'a': // Address register
1165 case 'd': // Data register (equivalent to 'r')
1166 case 'f': // Floating-point register
1167 case 'h': // High-part register
1168 case 'r': // General-purpose register
1169 case 'v': // Vector register
1170 return C_RegisterClass;
1171
1172 case 'Q': // Memory with base and unsigned 12-bit displacement
1173 case 'R': // Likewise, plus an index
1174 case 'S': // Memory with base and signed 20-bit displacement
1175 case 'T': // Likewise, plus an index
1176 case 'm': // Equivalent to 'T'.
1177 return C_Memory;
1178
1179 case 'I': // Unsigned 8-bit constant
1180 case 'J': // Unsigned 12-bit constant
1181 case 'K': // Signed 16-bit constant
1182 case 'L': // Signed 20-bit displacement (on all targets we support)
1183 case 'M': // 0x7fffffff
1184 return C_Immediate;
1185
1186 default:
1187 break;
1188 }
1189 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1190 switch (Constraint[1]) {
1191 case 'Q': // Address with base and unsigned 12-bit displacement
1192 case 'R': // Likewise, plus an index
1193 case 'S': // Address with base and signed 20-bit displacement
1194 case 'T': // Likewise, plus an index
1195 return C_Address;
1196
1197 default:
1198 break;
1199 }
1200 }
1201 return TargetLowering::getConstraintType(Constraint);
1202}
1203
1206 const char *constraint) const {
1208 Value *CallOperandVal = info.CallOperandVal;
1209 // If we don't have a value, we can't do a match,
1210 // but allow it at the lowest weight.
1211 if (!CallOperandVal)
1212 return CW_Default;
1213 Type *type = CallOperandVal->getType();
1214 // Look at the constraint type.
1215 switch (*constraint) {
1216 default:
1218 break;
1219
1220 case 'a': // Address register
1221 case 'd': // Data register (equivalent to 'r')
1222 case 'h': // High-part register
1223 case 'r': // General-purpose register
1224 weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1225 break;
1226
1227 case 'f': // Floating-point register
1228 if (!useSoftFloat())
1229 weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1230 break;
1231
1232 case 'v': // Vector register
1233 if (Subtarget.hasVector())
1234 weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1235 : CW_Default;
1236 break;
1237
1238 case 'I': // Unsigned 8-bit constant
1239 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1240 if (isUInt<8>(C->getZExtValue()))
1241 weight = CW_Constant;
1242 break;
1243
1244 case 'J': // Unsigned 12-bit constant
1245 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1246 if (isUInt<12>(C->getZExtValue()))
1247 weight = CW_Constant;
1248 break;
1249
1250 case 'K': // Signed 16-bit constant
1251 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1252 if (isInt<16>(C->getSExtValue()))
1253 weight = CW_Constant;
1254 break;
1255
1256 case 'L': // Signed 20-bit displacement (on all targets we support)
1257 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1258 if (isInt<20>(C->getSExtValue()))
1259 weight = CW_Constant;
1260 break;
1261
1262 case 'M': // 0x7fffffff
1263 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1264 if (C->getZExtValue() == 0x7fffffff)
1265 weight = CW_Constant;
1266 break;
1267 }
1268 return weight;
1269}
1270
1271// Parse a "{tNNN}" register constraint for which the register type "t"
1272// has already been verified. MC is the class associated with "t" and
1273// Map maps 0-based register numbers to LLVM register numbers.
1274static std::pair<unsigned, const TargetRegisterClass *>
1276 const unsigned *Map, unsigned Size) {
1277 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1278 if (isdigit(Constraint[2])) {
1279 unsigned Index;
1280 bool Failed =
1281 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1282 if (!Failed && Index < Size && Map[Index])
1283 return std::make_pair(Map[Index], RC);
1284 }
1285 return std::make_pair(0U, nullptr);
1286}
1287
1288std::pair<unsigned, const TargetRegisterClass *>
1290 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1291 if (Constraint.size() == 1) {
1292 // GCC Constraint Letters
1293 switch (Constraint[0]) {
1294 default: break;
1295 case 'd': // Data register (equivalent to 'r')
1296 case 'r': // General-purpose register
1297 if (VT.getSizeInBits() == 64)
1298 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1299 else if (VT.getSizeInBits() == 128)
1300 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1301 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1302
1303 case 'a': // Address register
1304 if (VT == MVT::i64)
1305 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1306 else if (VT == MVT::i128)
1307 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1308 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1309
1310 case 'h': // High-part register (an LLVM extension)
1311 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1312
1313 case 'f': // Floating-point register
1314 if (!useSoftFloat()) {
1315 if (VT.getSizeInBits() == 64)
1316 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1317 else if (VT.getSizeInBits() == 128)
1318 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1319 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1320 }
1321 break;
1322
1323 case 'v': // Vector register
1324 if (Subtarget.hasVector()) {
1325 if (VT.getSizeInBits() == 32)
1326 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1327 if (VT.getSizeInBits() == 64)
1328 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1329 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1330 }
1331 break;
1332 }
1333 }
1334 if (Constraint.starts_with("{")) {
1335
1336 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1337 // to check the size on.
1338 auto getVTSizeInBits = [&VT]() {
1339 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1340 };
1341
1342 // We need to override the default register parsing for GPRs and FPRs
1343 // because the interpretation depends on VT. The internal names of
1344 // the registers are also different from the external names
1345 // (F0D and F0S instead of F0, etc.).
1346 if (Constraint[1] == 'r') {
1347 if (getVTSizeInBits() == 32)
1348 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1350 if (getVTSizeInBits() == 128)
1351 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1353 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1355 }
1356 if (Constraint[1] == 'f') {
1357 if (useSoftFloat())
1358 return std::make_pair(
1359 0u, static_cast<const TargetRegisterClass *>(nullptr));
1360 if (getVTSizeInBits() == 32)
1361 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1363 if (getVTSizeInBits() == 128)
1364 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1366 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1368 }
1369 if (Constraint[1] == 'v') {
1370 if (!Subtarget.hasVector())
1371 return std::make_pair(
1372 0u, static_cast<const TargetRegisterClass *>(nullptr));
1373 if (getVTSizeInBits() == 32)
1374 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1376 if (getVTSizeInBits() == 64)
1377 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1379 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1381 }
1382 }
1383 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1384}
1385
1386// FIXME? Maybe this could be a TableGen attribute on some registers and
1387// this table could be generated automatically from RegInfo.
1390 const MachineFunction &MF) const {
1391 Register Reg =
1393 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D : 0)
1394 .Case("r15", Subtarget.isTargetELF() ? SystemZ::R15D : 0)
1395 .Default(0);
1396
1397 if (Reg)
1398 return Reg;
1399 report_fatal_error("Invalid register name global variable");
1400}
1401
1403 const Constant *PersonalityFn) const {
1404 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1405}
1406
1408 const Constant *PersonalityFn) const {
1409 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1410}
1411
1413 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1414 SelectionDAG &DAG) const {
1415 // Only support length 1 constraints for now.
1416 if (Constraint.size() == 1) {
1417 switch (Constraint[0]) {
1418 case 'I': // Unsigned 8-bit constant
1419 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1420 if (isUInt<8>(C->getZExtValue()))
1421 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1422 Op.getValueType()));
1423 return;
1424
1425 case 'J': // Unsigned 12-bit constant
1426 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1427 if (isUInt<12>(C->getZExtValue()))
1428 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1429 Op.getValueType()));
1430 return;
1431
1432 case 'K': // Signed 16-bit constant
1433 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1434 if (isInt<16>(C->getSExtValue()))
1435 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1436 Op.getValueType()));
1437 return;
1438
1439 case 'L': // Signed 20-bit displacement (on all targets we support)
1440 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1441 if (isInt<20>(C->getSExtValue()))
1442 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1443 Op.getValueType()));
1444 return;
1445
1446 case 'M': // 0x7fffffff
1447 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1448 if (C->getZExtValue() == 0x7fffffff)
1449 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1450 Op.getValueType()));
1451 return;
1452 }
1453 }
1454 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1455}
1456
1457//===----------------------------------------------------------------------===//
1458// Calling conventions
1459//===----------------------------------------------------------------------===//
1460
1461#include "SystemZGenCallingConv.inc"
1462
1464 CallingConv::ID) const {
1465 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1466 SystemZ::R14D, 0 };
1467 return ScratchRegs;
1468}
1469
1471 Type *ToType) const {
1472 return isTruncateFree(FromType, ToType);
1473}
1474
1476 return CI->isTailCall();
1477}
1478
1479// Value is a value that has been passed to us in the location described by VA
1480// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1481// any loads onto Chain.
1483 CCValAssign &VA, SDValue Chain,
1484 SDValue Value) {
1485 // If the argument has been promoted from a smaller type, insert an
1486 // assertion to capture this.
1487 if (VA.getLocInfo() == CCValAssign::SExt)
1489 DAG.getValueType(VA.getValVT()));
1490 else if (VA.getLocInfo() == CCValAssign::ZExt)
1492 DAG.getValueType(VA.getValVT()));
1493
1494 if (VA.isExtInLoc())
1495 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1496 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1497 // If this is a short vector argument loaded from the stack,
1498 // extend from i64 to full vector size and then bitcast.
1499 assert(VA.getLocVT() == MVT::i64);
1500 assert(VA.getValVT().isVector());
1501 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1502 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1503 } else
1504 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1505 return Value;
1506}
1507
1508// Value is a value of type VA.getValVT() that we need to copy into
1509// the location described by VA. Return a copy of Value converted to
1510// VA.getValVT(). The caller is responsible for handling indirect values.
1512 CCValAssign &VA, SDValue Value) {
1513 switch (VA.getLocInfo()) {
1514 case CCValAssign::SExt:
1515 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1516 case CCValAssign::ZExt:
1517 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1518 case CCValAssign::AExt:
1519 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1520 case CCValAssign::BCvt: {
1521 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1522 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1523 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1524 // For an f32 vararg we need to first promote it to an f64 and then
1525 // bitcast it to an i64.
1526 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1527 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1528 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1529 ? MVT::v2i64
1530 : VA.getLocVT();
1531 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1532 // For ELF, this is a short vector argument to be stored to the stack,
1533 // bitcast to v2i64 and then extract first element.
1534 if (BitCastToType == MVT::v2i64)
1535 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1536 DAG.getConstant(0, DL, MVT::i32));
1537 return Value;
1538 }
1539 case CCValAssign::Full:
1540 return Value;
1541 default:
1542 llvm_unreachable("Unhandled getLocInfo()");
1543 }
1544}
1545
1547 SDLoc DL(In);
1548 SDValue Lo, Hi;
1549 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1550 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1551 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1552 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1553 DAG.getConstant(64, DL, MVT::i32)));
1554 } else {
1555 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1556 }
1557
1558 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1559 MVT::Untyped, Hi, Lo);
1560 return SDValue(Pair, 0);
1561}
1562
1564 SDLoc DL(In);
1565 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1566 DL, MVT::i64, In);
1567 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1568 DL, MVT::i64, In);
1569
1570 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1571 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1572 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1573 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1574 DAG.getConstant(64, DL, MVT::i32));
1575 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1576 } else {
1577 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1578 }
1579}
1580
1582 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1583 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1584 EVT ValueVT = Val.getValueType();
1585 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1586 // Inline assembly operand.
1587 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1588 return true;
1589 }
1590
1591 return false;
1592}
1593
1595 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1596 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1597 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1598 // Inline assembly operand.
1599 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1600 return DAG.getBitcast(ValueVT, Res);
1601 }
1602
1603 return SDValue();
1604}
1605
1607 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1608 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1609 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1611 MachineFrameInfo &MFI = MF.getFrameInfo();
1613 SystemZMachineFunctionInfo *FuncInfo =
1615 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1616 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1617
1618 // Assign locations to all of the incoming arguments.
1620 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1621 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1622 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
1623
1624 unsigned NumFixedGPRs = 0;
1625 unsigned NumFixedFPRs = 0;
1626 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1627 SDValue ArgValue;
1628 CCValAssign &VA = ArgLocs[I];
1629 EVT LocVT = VA.getLocVT();
1630 if (VA.isRegLoc()) {
1631 // Arguments passed in registers
1632 const TargetRegisterClass *RC;
1633 switch (LocVT.getSimpleVT().SimpleTy) {
1634 default:
1635 // Integers smaller than i64 should be promoted to i64.
1636 llvm_unreachable("Unexpected argument type");
1637 case MVT::i32:
1638 NumFixedGPRs += 1;
1639 RC = &SystemZ::GR32BitRegClass;
1640 break;
1641 case MVT::i64:
1642 NumFixedGPRs += 1;
1643 RC = &SystemZ::GR64BitRegClass;
1644 break;
1645 case MVT::f32:
1646 NumFixedFPRs += 1;
1647 RC = &SystemZ::FP32BitRegClass;
1648 break;
1649 case MVT::f64:
1650 NumFixedFPRs += 1;
1651 RC = &SystemZ::FP64BitRegClass;
1652 break;
1653 case MVT::f128:
1654 NumFixedFPRs += 2;
1655 RC = &SystemZ::FP128BitRegClass;
1656 break;
1657 case MVT::v16i8:
1658 case MVT::v8i16:
1659 case MVT::v4i32:
1660 case MVT::v2i64:
1661 case MVT::v4f32:
1662 case MVT::v2f64:
1663 RC = &SystemZ::VR128BitRegClass;
1664 break;
1665 }
1666
1667 Register VReg = MRI.createVirtualRegister(RC);
1668 MRI.addLiveIn(VA.getLocReg(), VReg);
1669 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1670 } else {
1671 assert(VA.isMemLoc() && "Argument not register or memory");
1672
1673 // Create the frame index object for this incoming parameter.
1674 // FIXME: Pre-include call frame size in the offset, should not
1675 // need to manually add it here.
1676 int64_t ArgSPOffset = VA.getLocMemOffset();
1677 if (Subtarget.isTargetXPLINK64()) {
1678 auto &XPRegs =
1680 ArgSPOffset += XPRegs.getCallFrameSize();
1681 }
1682 int FI =
1683 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1684
1685 // Create the SelectionDAG nodes corresponding to a load
1686 // from this parameter. Unpromoted ints and floats are
1687 // passed as right-justified 8-byte values.
1688 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1689 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1690 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1691 DAG.getIntPtrConstant(4, DL));
1692 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1694 }
1695
1696 // Convert the value of the argument register into the value that's
1697 // being passed.
1698 if (VA.getLocInfo() == CCValAssign::Indirect) {
1699 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1701 // If the original argument was split (e.g. i128), we need
1702 // to load all parts of it here (using the same address).
1703 unsigned ArgIndex = Ins[I].OrigArgIndex;
1704 assert (Ins[I].PartOffset == 0);
1705 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1706 CCValAssign &PartVA = ArgLocs[I + 1];
1707 unsigned PartOffset = Ins[I + 1].PartOffset;
1708 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1709 DAG.getIntPtrConstant(PartOffset, DL));
1710 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1712 ++I;
1713 }
1714 } else
1715 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1716 }
1717
1718 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
1719 // Save the number of non-varargs registers for later use by va_start, etc.
1720 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1721 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1722
1723 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1724 Subtarget.getSpecialRegisters());
1725
1726 // Likewise the address (in the form of a frame index) of where the
1727 // first stack vararg would be. The 1-byte size here is arbitrary.
1728 // FIXME: Pre-include call frame size in the offset, should not
1729 // need to manually add it here.
1730 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
1731 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
1732 FuncInfo->setVarArgsFrameIndex(FI);
1733 }
1734
1735 if (IsVarArg && Subtarget.isTargetELF()) {
1736 // Save the number of non-varargs registers for later use by va_start, etc.
1737 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1738 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1739
1740 // Likewise the address (in the form of a frame index) of where the
1741 // first stack vararg would be. The 1-byte size here is arbitrary.
1742 int64_t VarArgsOffset = CCInfo.getStackSize();
1743 FuncInfo->setVarArgsFrameIndex(
1744 MFI.CreateFixedObject(1, VarArgsOffset, true));
1745
1746 // ...and a similar frame index for the caller-allocated save area
1747 // that will be used to store the incoming registers.
1748 int64_t RegSaveOffset =
1749 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1750 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1751 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1752
1753 // Store the FPR varargs in the reserved frame slots. (We store the
1754 // GPRs as part of the prologue.)
1755 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1757 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1758 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1759 int FI =
1761 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1763 &SystemZ::FP64BitRegClass);
1764 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1765 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1767 }
1768 // Join the stores, which are independent of one another.
1769 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1770 ArrayRef(&MemOps[NumFixedFPRs],
1771 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
1772 }
1773 }
1774
1775 if (Subtarget.isTargetXPLINK64()) {
1776 // Create virual register for handling incoming "ADA" special register (R5)
1777 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
1778 Register ADAvReg = MRI.createVirtualRegister(RC);
1779 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1780 Subtarget.getSpecialRegisters());
1781 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
1782 FuncInfo->setADAVirtualRegister(ADAvReg);
1783 }
1784 return Chain;
1785}
1786
1787static bool canUseSiblingCall(const CCState &ArgCCInfo,
1790 // Punt if there are any indirect or stack arguments, or if the call
1791 // needs the callee-saved argument register R6, or if the call uses
1792 // the callee-saved register arguments SwiftSelf and SwiftError.
1793 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1794 CCValAssign &VA = ArgLocs[I];
1796 return false;
1797 if (!VA.isRegLoc())
1798 return false;
1799 Register Reg = VA.getLocReg();
1800 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1801 return false;
1802 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1803 return false;
1804 }
1805 return true;
1806}
1807
1809 unsigned Offset, bool LoadAdr = false) {
1812 unsigned ADAvReg = MFI->getADAVirtualRegister();
1814
1815 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
1816 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
1817
1818 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
1819 if (!LoadAdr)
1820 Result = DAG.getLoad(
1821 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
1823
1824 return Result;
1825}
1826
1827// ADA access using Global value
1828// Note: for functions, address of descriptor is returned
1830 EVT PtrVT) {
1831 unsigned ADAtype;
1832 bool LoadAddr = false;
1833 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
1834 bool IsFunction =
1835 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
1836 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
1837
1838 if (IsFunction) {
1839 if (IsInternal) {
1841 LoadAddr = true;
1842 } else
1844 } else {
1846 }
1847 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
1848
1849 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
1850}
1851
1852static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
1853 SDLoc &DL, SDValue &Chain) {
1854 unsigned ADADelta = 0; // ADA offset in desc.
1855 unsigned EPADelta = 8; // EPA offset in desc.
1858
1859 // XPLink calling convention.
1860 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1861 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
1862 G->getGlobal()->hasPrivateLinkage());
1863 if (IsInternal) {
1866 unsigned ADAvReg = MFI->getADAVirtualRegister();
1867 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
1868 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1869 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1870 return true;
1871 } else {
1873 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
1874 ADA = getADAEntry(DAG, GA, DL, ADADelta);
1875 Callee = getADAEntry(DAG, GA, DL, EPADelta);
1876 }
1877 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1879 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
1880 ADA = getADAEntry(DAG, ES, DL, ADADelta);
1881 Callee = getADAEntry(DAG, ES, DL, EPADelta);
1882 } else {
1883 // Function pointer case
1884 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
1885 DAG.getConstant(ADADelta, DL, PtrVT));
1886 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
1888 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
1889 DAG.getConstant(EPADelta, DL, PtrVT));
1890 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
1892 }
1893 return false;
1894}
1895
1896SDValue
1898 SmallVectorImpl<SDValue> &InVals) const {
1899 SelectionDAG &DAG = CLI.DAG;
1900 SDLoc &DL = CLI.DL;
1902 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1904 SDValue Chain = CLI.Chain;
1905 SDValue Callee = CLI.Callee;
1906 bool &IsTailCall = CLI.IsTailCall;
1907 CallingConv::ID CallConv = CLI.CallConv;
1908 bool IsVarArg = CLI.IsVarArg;
1910 EVT PtrVT = getPointerTy(MF.getDataLayout());
1911 LLVMContext &Ctx = *DAG.getContext();
1913
1914 // FIXME: z/OS support to be added in later.
1915 if (Subtarget.isTargetXPLINK64())
1916 IsTailCall = false;
1917
1918 // Analyze the operands of the call, assigning locations to each operand.
1920 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1921 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1922
1923 // We don't support GuaranteedTailCallOpt, only automatically-detected
1924 // sibling calls.
1925 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1926 IsTailCall = false;
1927
1928 // Get a count of how many bytes are to be pushed on the stack.
1929 unsigned NumBytes = ArgCCInfo.getStackSize();
1930
1931 // Mark the start of the call.
1932 if (!IsTailCall)
1933 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1934
1935 // Copy argument values to their designated locations.
1937 SmallVector<SDValue, 8> MemOpChains;
1938 SDValue StackPtr;
1939 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1940 CCValAssign &VA = ArgLocs[I];
1941 SDValue ArgValue = OutVals[I];
1942
1943 if (VA.getLocInfo() == CCValAssign::Indirect) {
1944 // Store the argument in a stack slot and pass its address.
1945 unsigned ArgIndex = Outs[I].OrigArgIndex;
1946 EVT SlotVT;
1947 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1948 // Allocate the full stack space for a promoted (and split) argument.
1949 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1950 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1951 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1952 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1953 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1954 } else {
1955 SlotVT = Outs[I].VT;
1956 }
1957 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1958 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1959 MemOpChains.push_back(
1960 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1962 // If the original argument was split (e.g. i128), we need
1963 // to store all parts of it here (and pass just one address).
1964 assert (Outs[I].PartOffset == 0);
1965 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1966 SDValue PartValue = OutVals[I + 1];
1967 unsigned PartOffset = Outs[I + 1].PartOffset;
1968 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1969 DAG.getIntPtrConstant(PartOffset, DL));
1970 MemOpChains.push_back(
1971 DAG.getStore(Chain, DL, PartValue, Address,
1973 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
1974 SlotVT.getStoreSize()) && "Not enough space for argument part!");
1975 ++I;
1976 }
1977 ArgValue = SpillSlot;
1978 } else
1979 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1980
1981 if (VA.isRegLoc()) {
1982 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
1983 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
1984 // and low values.
1985 if (VA.getLocVT() == MVT::i128)
1986 ArgValue = lowerI128ToGR128(DAG, ArgValue);
1987 // Queue up the argument copies and emit them at the end.
1988 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1989 } else {
1990 assert(VA.isMemLoc() && "Argument not register or memory");
1991
1992 // Work out the address of the stack slot. Unpromoted ints and
1993 // floats are passed as right-justified 8-byte values.
1994 if (!StackPtr.getNode())
1995 StackPtr = DAG.getCopyFromReg(Chain, DL,
1996 Regs->getStackPointerRegister(), PtrVT);
1997 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
1998 VA.getLocMemOffset();
1999 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
2000 Offset += 4;
2001 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2003
2004 // Emit the store.
2005 MemOpChains.push_back(
2006 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2007
2008 // Although long doubles or vectors are passed through the stack when
2009 // they are vararg (non-fixed arguments), if a long double or vector
2010 // occupies the third and fourth slot of the argument list GPR3 should
2011 // still shadow the third slot of the argument list.
2012 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2013 SDValue ShadowArgValue =
2014 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2015 DAG.getIntPtrConstant(1, DL));
2016 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2017 }
2018 }
2019 }
2020
2021 // Join the stores, which are independent of one another.
2022 if (!MemOpChains.empty())
2023 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2024
2025 // Accept direct calls by converting symbolic call addresses to the
2026 // associated Target* opcodes. Force %r1 to be used for indirect
2027 // tail calls.
2028 SDValue Glue;
2029
2030 if (Subtarget.isTargetXPLINK64()) {
2031 SDValue ADA;
2032 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2033 if (!IsBRASL) {
2034 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2035 ->getAddressOfCalleeRegister();
2036 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2037 Glue = Chain.getValue(1);
2038 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2039 }
2040 RegsToPass.push_back(std::make_pair(
2041 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2042 } else {
2043 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2044 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2045 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2046 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2047 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2048 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2049 } else if (IsTailCall) {
2050 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2051 Glue = Chain.getValue(1);
2052 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2053 }
2054 }
2055
2056 // Build a sequence of copy-to-reg nodes, chained and glued together.
2057 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
2058 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
2059 RegsToPass[I].second, Glue);
2060 Glue = Chain.getValue(1);
2061 }
2062
2063 // The first call operand is the chain and the second is the target address.
2065 Ops.push_back(Chain);
2066 Ops.push_back(Callee);
2067
2068 // Add argument registers to the end of the list so that they are
2069 // known live into the call.
2070 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
2071 Ops.push_back(DAG.getRegister(RegsToPass[I].first,
2072 RegsToPass[I].second.getValueType()));
2073
2074 // Add a register mask operand representing the call-preserved registers.
2075 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2076 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2077 assert(Mask && "Missing call preserved mask for calling convention");
2078 Ops.push_back(DAG.getRegisterMask(Mask));
2079
2080 // Glue the call to the argument copies, if any.
2081 if (Glue.getNode())
2082 Ops.push_back(Glue);
2083
2084 // Emit the call.
2085 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2086 if (IsTailCall) {
2087 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2088 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2089 return Ret;
2090 }
2091 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2092 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2093 Glue = Chain.getValue(1);
2094
2095 // Mark the end of the call, which is glued to the call itself.
2096 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2097 Glue = Chain.getValue(1);
2098
2099 // Assign locations to each value returned by this call.
2101 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2102 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2103
2104 // Copy all of the result registers out of their specified physreg.
2105 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2106 CCValAssign &VA = RetLocs[I];
2107
2108 // Copy the value out, gluing the copy to the end of the call sequence.
2109 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2110 VA.getLocVT(), Glue);
2111 Chain = RetValue.getValue(1);
2112 Glue = RetValue.getValue(2);
2113
2114 // Convert the value of the return register into the value that's
2115 // being returned.
2116 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2117 }
2118
2119 return Chain;
2120}
2121
2122// Generate a call taking the given operands as arguments and returning a
2123// result of type RetVT.
2125 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2126 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2127 bool DoesNotReturn, bool IsReturnValueUsed) const {
2129 Args.reserve(Ops.size());
2130
2132 for (SDValue Op : Ops) {
2133 Entry.Node = Op;
2134 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
2135 Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
2136 Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
2137 Args.push_back(Entry);
2138 }
2139
2140 SDValue Callee =
2141 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2142
2143 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2145 bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned);
2146 CLI.setDebugLoc(DL)
2147 .setChain(Chain)
2148 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2149 .setNoReturn(DoesNotReturn)
2150 .setDiscardResult(!IsReturnValueUsed)
2151 .setSExtResult(SignExtend)
2152 .setZExtResult(!SignExtend);
2153 return LowerCallTo(CLI);
2154}
2155
2158 MachineFunction &MF, bool isVarArg,
2160 LLVMContext &Context) const {
2161 // Special case that we cannot easily detect in RetCC_SystemZ since
2162 // i128 may not be a legal type.
2163 for (auto &Out : Outs)
2164 if (Out.ArgVT == MVT::i128)
2165 return false;
2166
2168 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
2169 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2170}
2171
2172SDValue
2174 bool IsVarArg,
2176 const SmallVectorImpl<SDValue> &OutVals,
2177 const SDLoc &DL, SelectionDAG &DAG) const {
2179
2180 // Assign locations to each returned value.
2182 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2183 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2184
2185 // Quick exit for void returns
2186 if (RetLocs.empty())
2187 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2188
2189 if (CallConv == CallingConv::GHC)
2190 report_fatal_error("GHC functions return void only");
2191
2192 // Copy the result values into the output registers.
2193 SDValue Glue;
2195 RetOps.push_back(Chain);
2196 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2197 CCValAssign &VA = RetLocs[I];
2198 SDValue RetValue = OutVals[I];
2199
2200 // Make the return register live on exit.
2201 assert(VA.isRegLoc() && "Can only return in registers!");
2202
2203 // Promote the value as required.
2204 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2205
2206 // Chain and glue the copies together.
2207 Register Reg = VA.getLocReg();
2208 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2209 Glue = Chain.getValue(1);
2210 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2211 }
2212
2213 // Update chain and glue.
2214 RetOps[0] = Chain;
2215 if (Glue.getNode())
2216 RetOps.push_back(Glue);
2217
2218 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2219}
2220
2221// Return true if Op is an intrinsic node with chain that returns the CC value
2222// as its only (other) argument. Provide the associated SystemZISD opcode and
2223// the mask of valid CC values if so.
2224static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2225 unsigned &CCValid) {
2226 unsigned Id = Op.getConstantOperandVal(1);
2227 switch (Id) {
2228 case Intrinsic::s390_tbegin:
2229 Opcode = SystemZISD::TBEGIN;
2230 CCValid = SystemZ::CCMASK_TBEGIN;
2231 return true;
2232
2233 case Intrinsic::s390_tbegin_nofloat:
2235 CCValid = SystemZ::CCMASK_TBEGIN;
2236 return true;
2237
2238 case Intrinsic::s390_tend:
2239 Opcode = SystemZISD::TEND;
2240 CCValid = SystemZ::CCMASK_TEND;
2241 return true;
2242
2243 default:
2244 return false;
2245 }
2246}
2247
2248// Return true if Op is an intrinsic node without chain that returns the
2249// CC value as its final argument. Provide the associated SystemZISD
2250// opcode and the mask of valid CC values if so.
2251static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2252 unsigned Id = Op.getConstantOperandVal(0);
2253 switch (Id) {
2254 case Intrinsic::s390_vpkshs:
2255 case Intrinsic::s390_vpksfs:
2256 case Intrinsic::s390_vpksgs:
2257 Opcode = SystemZISD::PACKS_CC;
2258 CCValid = SystemZ::CCMASK_VCMP;
2259 return true;
2260
2261 case Intrinsic::s390_vpklshs:
2262 case Intrinsic::s390_vpklsfs:
2263 case Intrinsic::s390_vpklsgs:
2264 Opcode = SystemZISD::PACKLS_CC;
2265 CCValid = SystemZ::CCMASK_VCMP;
2266 return true;
2267
2268 case Intrinsic::s390_vceqbs:
2269 case Intrinsic::s390_vceqhs:
2270 case Intrinsic::s390_vceqfs:
2271 case Intrinsic::s390_vceqgs:
2272 Opcode = SystemZISD::VICMPES;
2273 CCValid = SystemZ::CCMASK_VCMP;
2274 return true;
2275
2276 case Intrinsic::s390_vchbs:
2277 case Intrinsic::s390_vchhs:
2278 case Intrinsic::s390_vchfs:
2279 case Intrinsic::s390_vchgs:
2280 Opcode = SystemZISD::VICMPHS;
2281 CCValid = SystemZ::CCMASK_VCMP;
2282 return true;
2283
2284 case Intrinsic::s390_vchlbs:
2285 case Intrinsic::s390_vchlhs:
2286 case Intrinsic::s390_vchlfs:
2287 case Intrinsic::s390_vchlgs:
2288 Opcode = SystemZISD::VICMPHLS;
2289 CCValid = SystemZ::CCMASK_VCMP;
2290 return true;
2291
2292 case Intrinsic::s390_vtm:
2293 Opcode = SystemZISD::VTM;
2294 CCValid = SystemZ::CCMASK_VCMP;
2295 return true;
2296
2297 case Intrinsic::s390_vfaebs:
2298 case Intrinsic::s390_vfaehs:
2299 case Intrinsic::s390_vfaefs:
2300 Opcode = SystemZISD::VFAE_CC;
2301 CCValid = SystemZ::CCMASK_ANY;
2302 return true;
2303
2304 case Intrinsic::s390_vfaezbs:
2305 case Intrinsic::s390_vfaezhs:
2306 case Intrinsic::s390_vfaezfs:
2307 Opcode = SystemZISD::VFAEZ_CC;
2308 CCValid = SystemZ::CCMASK_ANY;
2309 return true;
2310
2311 case Intrinsic::s390_vfeebs:
2312 case Intrinsic::s390_vfeehs:
2313 case Intrinsic::s390_vfeefs:
2314 Opcode = SystemZISD::VFEE_CC;
2315 CCValid = SystemZ::CCMASK_ANY;
2316 return true;
2317
2318 case Intrinsic::s390_vfeezbs:
2319 case Intrinsic::s390_vfeezhs:
2320 case Intrinsic::s390_vfeezfs:
2321 Opcode = SystemZISD::VFEEZ_CC;
2322 CCValid = SystemZ::CCMASK_ANY;
2323 return true;
2324
2325 case Intrinsic::s390_vfenebs:
2326 case Intrinsic::s390_vfenehs:
2327 case Intrinsic::s390_vfenefs:
2328 Opcode = SystemZISD::VFENE_CC;
2329 CCValid = SystemZ::CCMASK_ANY;
2330 return true;
2331
2332 case Intrinsic::s390_vfenezbs:
2333 case Intrinsic::s390_vfenezhs:
2334 case Intrinsic::s390_vfenezfs:
2335 Opcode = SystemZISD::VFENEZ_CC;
2336 CCValid = SystemZ::CCMASK_ANY;
2337 return true;
2338
2339 case Intrinsic::s390_vistrbs:
2340 case Intrinsic::s390_vistrhs:
2341 case Intrinsic::s390_vistrfs:
2342 Opcode = SystemZISD::VISTR_CC;
2344 return true;
2345
2346 case Intrinsic::s390_vstrcbs:
2347 case Intrinsic::s390_vstrchs:
2348 case Intrinsic::s390_vstrcfs:
2349 Opcode = SystemZISD::VSTRC_CC;
2350 CCValid = SystemZ::CCMASK_ANY;
2351 return true;
2352
2353 case Intrinsic::s390_vstrczbs:
2354 case Intrinsic::s390_vstrczhs:
2355 case Intrinsic::s390_vstrczfs:
2356 Opcode = SystemZISD::VSTRCZ_CC;
2357 CCValid = SystemZ::CCMASK_ANY;
2358 return true;
2359
2360 case Intrinsic::s390_vstrsb:
2361 case Intrinsic::s390_vstrsh:
2362 case Intrinsic::s390_vstrsf:
2363 Opcode = SystemZISD::VSTRS_CC;
2364 CCValid = SystemZ::CCMASK_ANY;
2365 return true;
2366
2367 case Intrinsic::s390_vstrszb:
2368 case Intrinsic::s390_vstrszh:
2369 case Intrinsic::s390_vstrszf:
2370 Opcode = SystemZISD::VSTRSZ_CC;
2371 CCValid = SystemZ::CCMASK_ANY;
2372 return true;
2373
2374 case Intrinsic::s390_vfcedbs:
2375 case Intrinsic::s390_vfcesbs:
2376 Opcode = SystemZISD::VFCMPES;
2377 CCValid = SystemZ::CCMASK_VCMP;
2378 return true;
2379
2380 case Intrinsic::s390_vfchdbs:
2381 case Intrinsic::s390_vfchsbs:
2382 Opcode = SystemZISD::VFCMPHS;
2383 CCValid = SystemZ::CCMASK_VCMP;
2384 return true;
2385
2386 case Intrinsic::s390_vfchedbs:
2387 case Intrinsic::s390_vfchesbs:
2388 Opcode = SystemZISD::VFCMPHES;
2389 CCValid = SystemZ::CCMASK_VCMP;
2390 return true;
2391
2392 case Intrinsic::s390_vftcidb:
2393 case Intrinsic::s390_vftcisb:
2394 Opcode = SystemZISD::VFTCI;
2395 CCValid = SystemZ::CCMASK_VCMP;
2396 return true;
2397
2398 case Intrinsic::s390_tdc:
2399 Opcode = SystemZISD::TDC;
2400 CCValid = SystemZ::CCMASK_TDC;
2401 return true;
2402
2403 default:
2404 return false;
2405 }
2406}
2407
2408// Emit an intrinsic with chain and an explicit CC register result.
2410 unsigned Opcode) {
2411 // Copy all operands except the intrinsic ID.
2412 unsigned NumOps = Op.getNumOperands();
2414 Ops.reserve(NumOps - 1);
2415 Ops.push_back(Op.getOperand(0));
2416 for (unsigned I = 2; I < NumOps; ++I)
2417 Ops.push_back(Op.getOperand(I));
2418
2419 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2420 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2421 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2422 SDValue OldChain = SDValue(Op.getNode(), 1);
2423 SDValue NewChain = SDValue(Intr.getNode(), 1);
2424 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2425 return Intr.getNode();
2426}
2427
2428// Emit an intrinsic with an explicit CC register result.
2430 unsigned Opcode) {
2431 // Copy all operands except the intrinsic ID.
2432 unsigned NumOps = Op.getNumOperands();
2434 Ops.reserve(NumOps - 1);
2435 for (unsigned I = 1; I < NumOps; ++I)
2436 Ops.push_back(Op.getOperand(I));
2437
2438 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2439 return Intr.getNode();
2440}
2441
2442// CC is a comparison that will be implemented using an integer or
2443// floating-point comparison. Return the condition code mask for
2444// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2445// unsigned comparisons and clear for signed ones. In the floating-point
2446// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2448#define CONV(X) \
2449 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2450 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2451 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2452
2453 switch (CC) {
2454 default:
2455 llvm_unreachable("Invalid integer condition!");
2456
2457 CONV(EQ);
2458 CONV(NE);
2459 CONV(GT);
2460 CONV(GE);
2461 CONV(LT);
2462 CONV(LE);
2463
2464 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2466 }
2467#undef CONV
2468}
2469
2470// If C can be converted to a comparison against zero, adjust the operands
2471// as necessary.
2472static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2473 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2474 return;
2475
2476 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2477 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2478 return;
2479
2480 int64_t Value = ConstOp1->getSExtValue();
2481 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2482 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2483 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2484 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2485 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2486 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2487 }
2488}
2489
2490// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2491// adjust the operands as necessary.
2492static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2493 Comparison &C) {
2494 // For us to make any changes, it must a comparison between a single-use
2495 // load and a constant.
2496 if (!C.Op0.hasOneUse() ||
2497 C.Op0.getOpcode() != ISD::LOAD ||
2498 C.Op1.getOpcode() != ISD::Constant)
2499 return;
2500
2501 // We must have an 8- or 16-bit load.
2502 auto *Load = cast<LoadSDNode>(C.Op0);
2503 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2504 if ((NumBits != 8 && NumBits != 16) ||
2505 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2506 return;
2507
2508 // The load must be an extending one and the constant must be within the
2509 // range of the unextended value.
2510 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2511 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2512 return;
2513 uint64_t Value = ConstOp1->getZExtValue();
2514 uint64_t Mask = (1 << NumBits) - 1;
2515 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2516 // Make sure that ConstOp1 is in range of C.Op0.
2517 int64_t SignedValue = ConstOp1->getSExtValue();
2518 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2519 return;
2520 if (C.ICmpType != SystemZICMP::SignedOnly) {
2521 // Unsigned comparison between two sign-extended values is equivalent
2522 // to unsigned comparison between two zero-extended values.
2523 Value &= Mask;
2524 } else if (NumBits == 8) {
2525 // Try to treat the comparison as unsigned, so that we can use CLI.
2526 // Adjust CCMask and Value as necessary.
2527 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2528 // Test whether the high bit of the byte is set.
2529 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2530 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2531 // Test whether the high bit of the byte is clear.
2532 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2533 else
2534 // No instruction exists for this combination.
2535 return;
2536 C.ICmpType = SystemZICMP::UnsignedOnly;
2537 }
2538 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2539 if (Value > Mask)
2540 return;
2541 // If the constant is in range, we can use any comparison.
2542 C.ICmpType = SystemZICMP::Any;
2543 } else
2544 return;
2545
2546 // Make sure that the first operand is an i32 of the right extension type.
2547 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2550 if (C.Op0.getValueType() != MVT::i32 ||
2551 Load->getExtensionType() != ExtType) {
2552 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2553 Load->getBasePtr(), Load->getPointerInfo(),
2554 Load->getMemoryVT(), Load->getAlign(),
2555 Load->getMemOperand()->getFlags());
2556 // Update the chain uses.
2557 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2558 }
2559
2560 // Make sure that the second operand is an i32 with the right value.
2561 if (C.Op1.getValueType() != MVT::i32 ||
2562 Value != ConstOp1->getZExtValue())
2563 C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2564}
2565
2566// Return true if Op is either an unextended load, or a load suitable
2567// for integer register-memory comparisons of type ICmpType.
2568static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2569 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2570 if (Load) {
2571 // There are no instructions to compare a register with a memory byte.
2572 if (Load->getMemoryVT() == MVT::i8)
2573 return false;
2574 // Otherwise decide on extension type.
2575 switch (Load->getExtensionType()) {
2576 case ISD::NON_EXTLOAD:
2577 return true;
2578 case ISD::SEXTLOAD:
2579 return ICmpType != SystemZICMP::UnsignedOnly;
2580 case ISD::ZEXTLOAD:
2581 return ICmpType != SystemZICMP::SignedOnly;
2582 default:
2583 break;
2584 }
2585 }
2586 return false;
2587}
2588
2589// Return true if it is better to swap the operands of C.
2590static bool shouldSwapCmpOperands(const Comparison &C) {
2591 // Leave i128 and f128 comparisons alone, since they have no memory forms.
2592 if (C.Op0.getValueType() == MVT::i128)
2593 return false;
2594 if (C.Op0.getValueType() == MVT::f128)
2595 return false;
2596
2597 // Always keep a floating-point constant second, since comparisons with
2598 // zero can use LOAD TEST and comparisons with other constants make a
2599 // natural memory operand.
2600 if (isa<ConstantFPSDNode>(C.Op1))
2601 return false;
2602
2603 // Never swap comparisons with zero since there are many ways to optimize
2604 // those later.
2605 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2606 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2607 return false;
2608
2609 // Also keep natural memory operands second if the loaded value is
2610 // only used here. Several comparisons have memory forms.
2611 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2612 return false;
2613
2614 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2615 // In that case we generally prefer the memory to be second.
2616 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2617 // The only exceptions are when the second operand is a constant and
2618 // we can use things like CHHSI.
2619 if (!ConstOp1)
2620 return true;
2621 // The unsigned memory-immediate instructions can handle 16-bit
2622 // unsigned integers.
2623 if (C.ICmpType != SystemZICMP::SignedOnly &&
2624 isUInt<16>(ConstOp1->getZExtValue()))
2625 return false;
2626 // The signed memory-immediate instructions can handle 16-bit
2627 // signed integers.
2628 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2629 isInt<16>(ConstOp1->getSExtValue()))
2630 return false;
2631 return true;
2632 }
2633
2634 // Try to promote the use of CGFR and CLGFR.
2635 unsigned Opcode0 = C.Op0.getOpcode();
2636 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2637 return true;
2638 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2639 return true;
2640 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
2641 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2642 C.Op0.getConstantOperandVal(1) == 0xffffffff)
2643 return true;
2644
2645 return false;
2646}
2647
2648// Check whether C tests for equality between X and Y and whether X - Y
2649// or Y - X is also computed. In that case it's better to compare the
2650// result of the subtraction against zero.
2652 Comparison &C) {
2653 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2654 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2655 for (SDNode *N : C.Op0->uses()) {
2656 if (N->getOpcode() == ISD::SUB &&
2657 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2658 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2659 // Disable the nsw and nuw flags: the backend needs to handle
2660 // overflow as well during comparison elimination.
2661 SDNodeFlags Flags = N->getFlags();
2662 Flags.setNoSignedWrap(false);
2663 Flags.setNoUnsignedWrap(false);
2664 N->setFlags(Flags);
2665 C.Op0 = SDValue(N, 0);
2666 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2667 return;
2668 }
2669 }
2670 }
2671}
2672
2673// Check whether C compares a floating-point value with zero and if that
2674// floating-point value is also negated. In this case we can use the
2675// negation to set CC, so avoiding separate LOAD AND TEST and
2676// LOAD (NEGATIVE/COMPLEMENT) instructions.
2677static void adjustForFNeg(Comparison &C) {
2678 // This optimization is invalid for strict comparisons, since FNEG
2679 // does not raise any exceptions.
2680 if (C.Chain)
2681 return;
2682 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2683 if (C1 && C1->isZero()) {
2684 for (SDNode *N : C.Op0->uses()) {
2685 if (N->getOpcode() == ISD::FNEG) {
2686 C.Op0 = SDValue(N, 0);
2687 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2688 return;
2689 }
2690 }
2691 }
2692}
2693
2694// Check whether C compares (shl X, 32) with 0 and whether X is
2695// also sign-extended. In that case it is better to test the result
2696// of the sign extension using LTGFR.
2697//
2698// This case is important because InstCombine transforms a comparison
2699// with (sext (trunc X)) into a comparison with (shl X, 32).
2700static void adjustForLTGFR(Comparison &C) {
2701 // Check for a comparison between (shl X, 32) and 0.
2702 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
2703 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
2704 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2705 if (C1 && C1->getZExtValue() == 32) {
2706 SDValue ShlOp0 = C.Op0.getOperand(0);
2707 // See whether X has any SIGN_EXTEND_INREG uses.
2708 for (SDNode *N : ShlOp0->uses()) {
2709 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2710 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2711 C.Op0 = SDValue(N, 0);
2712 return;
2713 }
2714 }
2715 }
2716 }
2717}
2718
2719// If C compares the truncation of an extending load, try to compare
2720// the untruncated value instead. This exposes more opportunities to
2721// reuse CC.
2722static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2723 Comparison &C) {
2724 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2725 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2726 C.Op1.getOpcode() == ISD::Constant &&
2727 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
2728 C.Op1->getAsZExtVal() == 0) {
2729 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2730 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
2731 C.Op0.getValueSizeInBits().getFixedValue()) {
2732 unsigned Type = L->getExtensionType();
2733 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2734 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2735 C.Op0 = C.Op0.getOperand(0);
2736 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2737 }
2738 }
2739 }
2740}
2741
2742// Return true if shift operation N has an in-range constant shift value.
2743// Store it in ShiftVal if so.
2744static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2745 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2746 if (!Shift)
2747 return false;
2748
2749 uint64_t Amount = Shift->getZExtValue();
2750 if (Amount >= N.getValueSizeInBits())
2751 return false;
2752
2753 ShiftVal = Amount;
2754 return true;
2755}
2756
2757// Check whether an AND with Mask is suitable for a TEST UNDER MASK
2758// instruction and whether the CC value is descriptive enough to handle
2759// a comparison of type Opcode between the AND result and CmpVal.
2760// CCMask says which comparison result is being tested and BitSize is
2761// the number of bits in the operands. If TEST UNDER MASK can be used,
2762// return the corresponding CC mask, otherwise return 0.
2763static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2764 uint64_t Mask, uint64_t CmpVal,
2765 unsigned ICmpType) {
2766 assert(Mask != 0 && "ANDs with zero should have been removed by now");
2767
2768 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2769 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2770 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2771 return 0;
2772
2773 // Work out the masks for the lowest and highest bits.
2775 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
2776
2777 // Signed ordered comparisons are effectively unsigned if the sign
2778 // bit is dropped.
2779 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2780
2781 // Check for equality comparisons with 0, or the equivalent.
2782 if (CmpVal == 0) {
2783 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2785 if (CCMask == SystemZ::CCMASK_CMP_NE)
2787 }
2788 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2789 if (CCMask == SystemZ::CCMASK_CMP_LT)
2791 if (CCMask == SystemZ::CCMASK_CMP_GE)
2793 }
2794 if (EffectivelyUnsigned && CmpVal < Low) {
2795 if (CCMask == SystemZ::CCMASK_CMP_LE)
2797 if (CCMask == SystemZ::CCMASK_CMP_GT)
2799 }
2800
2801 // Check for equality comparisons with the mask, or the equivalent.
2802 if (CmpVal == Mask) {
2803 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2805 if (CCMask == SystemZ::CCMASK_CMP_NE)
2807 }
2808 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2809 if (CCMask == SystemZ::CCMASK_CMP_GT)
2811 if (CCMask == SystemZ::CCMASK_CMP_LE)
2813 }
2814 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2815 if (CCMask == SystemZ::CCMASK_CMP_GE)
2817 if (CCMask == SystemZ::CCMASK_CMP_LT)
2819 }
2820
2821 // Check for ordered comparisons with the top bit.
2822 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2823 if (CCMask == SystemZ::CCMASK_CMP_LE)
2825 if (CCMask == SystemZ::CCMASK_CMP_GT)
2827 }
2828 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2829 if (CCMask == SystemZ::CCMASK_CMP_LT)
2831 if (CCMask == SystemZ::CCMASK_CMP_GE)
2833 }
2834
2835 // If there are just two bits, we can do equality checks for Low and High
2836 // as well.
2837 if (Mask == Low + High) {
2838 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2840 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2842 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2844 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2846 }
2847
2848 // Looks like we've exhausted our options.
2849 return 0;
2850}
2851
2852// See whether C can be implemented as a TEST UNDER MASK instruction.
2853// Update the arguments with the TM version if so.
2855 Comparison &C) {
2856 // Use VECTOR TEST UNDER MASK for i128 operations.
2857 if (C.Op0.getValueType() == MVT::i128) {
2858 // We can use VTM for EQ/NE comparisons of x & y against 0.
2859 if (C.Op0.getOpcode() == ISD::AND &&
2860 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2861 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
2862 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
2863 if (Mask && Mask->getAPIntValue() == 0) {
2864 C.Opcode = SystemZISD::VTM;
2865 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
2866 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
2867 C.CCValid = SystemZ::CCMASK_VCMP;
2868 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
2869 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
2870 else
2871 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
2872 }
2873 }
2874 return;
2875 }
2876
2877 // Check that we have a comparison with a constant.
2878 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2879 if (!ConstOp1)
2880 return;
2881 uint64_t CmpVal = ConstOp1->getZExtValue();
2882
2883 // Check whether the nonconstant input is an AND with a constant mask.
2884 Comparison NewC(C);
2885 uint64_t MaskVal;
2886 ConstantSDNode *Mask = nullptr;
2887 if (C.Op0.getOpcode() == ISD::AND) {
2888 NewC.Op0 = C.Op0.getOperand(0);
2889 NewC.Op1 = C.Op0.getOperand(1);
2890 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2891 if (!Mask)
2892 return;
2893 MaskVal = Mask->getZExtValue();
2894 } else {
2895 // There is no instruction to compare with a 64-bit immediate
2896 // so use TMHH instead if possible. We need an unsigned ordered
2897 // comparison with an i64 immediate.
2898 if (NewC.Op0.getValueType() != MVT::i64 ||
2899 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2900 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2901 NewC.ICmpType == SystemZICMP::SignedOnly)
2902 return;
2903 // Convert LE and GT comparisons into LT and GE.
2904 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2905 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2906 if (CmpVal == uint64_t(-1))
2907 return;
2908 CmpVal += 1;
2909 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2910 }
2911 // If the low N bits of Op1 are zero than the low N bits of Op0 can
2912 // be masked off without changing the result.
2913 MaskVal = -(CmpVal & -CmpVal);
2914 NewC.ICmpType = SystemZICMP::UnsignedOnly;
2915 }
2916 if (!MaskVal)
2917 return;
2918
2919 // Check whether the combination of mask, comparison value and comparison
2920 // type are suitable.
2921 unsigned BitSize = NewC.Op0.getValueSizeInBits();
2922 unsigned NewCCMask, ShiftVal;
2923 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2924 NewC.Op0.getOpcode() == ISD::SHL &&
2925 isSimpleShift(NewC.Op0, ShiftVal) &&
2926 (MaskVal >> ShiftVal != 0) &&
2927 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2928 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2929 MaskVal >> ShiftVal,
2930 CmpVal >> ShiftVal,
2931 SystemZICMP::Any))) {
2932 NewC.Op0 = NewC.Op0.getOperand(0);
2933 MaskVal >>= ShiftVal;
2934 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2935 NewC.Op0.getOpcode() == ISD::SRL &&
2936 isSimpleShift(NewC.Op0, ShiftVal) &&
2937 (MaskVal << ShiftVal != 0) &&
2938 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2939 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2940 MaskVal << ShiftVal,
2941 CmpVal << ShiftVal,
2943 NewC.Op0 = NewC.Op0.getOperand(0);
2944 MaskVal <<= ShiftVal;
2945 } else {
2946 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2947 NewC.ICmpType);
2948 if (!NewCCMask)
2949 return;
2950 }
2951
2952 // Go ahead and make the change.
2953 C.Opcode = SystemZISD::TM;
2954 C.Op0 = NewC.Op0;
2955 if (Mask && Mask->getZExtValue() == MaskVal)
2956 C.Op1 = SDValue(Mask, 0);
2957 else
2958 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2959 C.CCValid = SystemZ::CCMASK_TM;
2960 C.CCMask = NewCCMask;
2961}
2962
2963// Implement i128 comparison in vector registers.
2964static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
2965 Comparison &C) {
2966 if (C.Opcode != SystemZISD::ICMP)
2967 return;
2968 if (C.Op0.getValueType() != MVT::i128)
2969 return;
2970
2971 // (In-)Equality comparisons can be implemented via VCEQGS.
2972 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2973 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2974 C.Opcode = SystemZISD::VICMPES;
2975 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
2976 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
2977 C.CCValid = SystemZ::CCMASK_VCMP;
2978 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
2979 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
2980 else
2981 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
2982 return;
2983 }
2984
2985 // Normalize other comparisons to GT.
2986 bool Swap = false, Invert = false;
2987 switch (C.CCMask) {
2988 case SystemZ::CCMASK_CMP_GT: break;
2989 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
2990 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
2991 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
2992 default: llvm_unreachable("Invalid integer condition!");
2993 }
2994 if (Swap)
2995 std::swap(C.Op0, C.Op1);
2996
2997 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2998 C.Opcode = SystemZISD::UCMP128HI;
2999 else
3000 C.Opcode = SystemZISD::SCMP128HI;
3001 C.CCValid = SystemZ::CCMASK_ANY;
3002 C.CCMask = SystemZ::CCMASK_1;
3003
3004 if (Invert)
3005 C.CCMask ^= C.CCValid;
3006}
3007
3008// See whether the comparison argument contains a redundant AND
3009// and remove it if so. This sometimes happens due to the generic
3010// BRCOND expansion.
3012 Comparison &C) {
3013 if (C.Op0.getOpcode() != ISD::AND)
3014 return;
3015 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3016 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3017 return;
3018 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3019 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3020 return;
3021
3022 C.Op0 = C.Op0.getOperand(0);
3023}
3024
3025// Return a Comparison that tests the condition-code result of intrinsic
3026// node Call against constant integer CC using comparison code Cond.
3027// Opcode is the opcode of the SystemZISD operation for the intrinsic
3028// and CCValid is the set of possible condition-code results.
3029static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3030 SDValue Call, unsigned CCValid, uint64_t CC,
3032 Comparison C(Call, SDValue(), SDValue());
3033 C.Opcode = Opcode;
3034 C.CCValid = CCValid;
3035 if (Cond == ISD::SETEQ)
3036 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3037 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3038 else if (Cond == ISD::SETNE)
3039 // ...and the inverse of that.
3040 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3041 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3042 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3043 // always true for CC>3.
3044 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3045 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3046 // ...and the inverse of that.
3047 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3048 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3049 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3050 // always true for CC>3.
3051 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3052 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3053 // ...and the inverse of that.
3054 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3055 else
3056 llvm_unreachable("Unexpected integer comparison type");
3057 C.CCMask &= CCValid;
3058 return C;
3059}
3060
3061// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3062static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3063 ISD::CondCode Cond, const SDLoc &DL,
3064 SDValue Chain = SDValue(),
3065 bool IsSignaling = false) {
3066 if (CmpOp1.getOpcode() == ISD::Constant) {
3067 assert(!Chain);
3068 unsigned Opcode, CCValid;
3069 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3070 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3071 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3072 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3073 CmpOp1->getAsZExtVal(), Cond);
3074 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3075 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3076 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3077 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3078 CmpOp1->getAsZExtVal(), Cond);
3079 }
3080 Comparison C(CmpOp0, CmpOp1, Chain);
3081 C.CCMask = CCMaskForCondCode(Cond);
3082 if (C.Op0.getValueType().isFloatingPoint()) {
3083 C.CCValid = SystemZ::CCMASK_FCMP;
3084 if (!C.Chain)
3085 C.Opcode = SystemZISD::FCMP;
3086 else if (!IsSignaling)
3087 C.Opcode = SystemZISD::STRICT_FCMP;
3088 else
3089 C.Opcode = SystemZISD::STRICT_FCMPS;
3091 } else {
3092 assert(!C.Chain);
3093 C.CCValid = SystemZ::CCMASK_ICMP;
3094 C.Opcode = SystemZISD::ICMP;
3095 // Choose the type of comparison. Equality and inequality tests can
3096 // use either signed or unsigned comparisons. The choice also doesn't
3097 // matter if both sign bits are known to be clear. In those cases we
3098 // want to give the main isel code the freedom to choose whichever
3099 // form fits best.
3100 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3101 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3102 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3103 C.ICmpType = SystemZICMP::Any;
3104 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3105 C.ICmpType = SystemZICMP::UnsignedOnly;
3106 else
3107 C.ICmpType = SystemZICMP::SignedOnly;
3108 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3109 adjustForRedundantAnd(DAG, DL, C);
3110 adjustZeroCmp(DAG, DL, C);
3111 adjustSubwordCmp(DAG, DL, C);
3112 adjustForSubtraction(DAG, DL, C);
3114 adjustICmpTruncate(DAG, DL, C);
3115 }
3116
3117 if (shouldSwapCmpOperands(C)) {
3118 std::swap(C.Op0, C.Op1);
3119 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3120 }
3121
3123 adjustICmp128(DAG, DL, C);
3124 return C;
3125}
3126
3127// Emit the comparison instruction described by C.
3128static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3129 if (!C.Op1.getNode()) {
3130 SDNode *Node;
3131 switch (C.Op0.getOpcode()) {
3133 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3134 return SDValue(Node, 0);
3136 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3137 return SDValue(Node, Node->getNumValues() - 1);
3138 default:
3139 llvm_unreachable("Invalid comparison operands");
3140 }
3141 }
3142 if (C.Opcode == SystemZISD::ICMP)
3143 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3144 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3145 if (C.Opcode == SystemZISD::TM) {
3146 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3148 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3149 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3150 }
3151 if (C.Opcode == SystemZISD::VICMPES) {
3152 SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32);
3153 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3154 return SDValue(Val.getNode(), 1);
3155 }
3156 if (C.Chain) {
3157 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3158 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3159 }
3160 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3161}
3162
3163// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3164// 64 bits. Extend is the extension type to use. Store the high part
3165// in Hi and the low part in Lo.
3166static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3167 SDValue Op0, SDValue Op1, SDValue &Hi,
3168 SDValue &Lo) {
3169 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3170 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3171 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3172 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3173 DAG.getConstant(32, DL, MVT::i64));
3174 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3175 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3176}
3177
3178// Lower a binary operation that produces two VT results, one in each
3179// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3180// and Opcode performs the GR128 operation. Store the even register result
3181// in Even and the odd register result in Odd.
3182static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3183 unsigned Opcode, SDValue Op0, SDValue Op1,
3184 SDValue &Even, SDValue &Odd) {
3185 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3186 bool Is32Bit = is32Bit(VT);
3187 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3188 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3189}
3190
3191// Return an i32 value that is 1 if the CC value produced by CCReg is
3192// in the mask CCMask and 0 otherwise. CC is known to have a value
3193// in CCValid, so other values can be ignored.
3194static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3195 unsigned CCValid, unsigned CCMask) {
3196 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3197 DAG.getConstant(0, DL, MVT::i32),
3198 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3199 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3200 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3201}
3202
3203// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3204// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3205// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3206// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3207// floating-point comparisons.
3210 switch (CC) {
3211 case ISD::SETOEQ:
3212 case ISD::SETEQ:
3213 switch (Mode) {
3214 case CmpMode::Int: return SystemZISD::VICMPE;
3215 case CmpMode::FP: return SystemZISD::VFCMPE;
3216 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3217 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3218 }
3219 llvm_unreachable("Bad mode");
3220
3221 case ISD::SETOGE:
3222 case ISD::SETGE:
3223 switch (Mode) {
3224 case CmpMode::Int: return 0;
3225 case CmpMode::FP: return SystemZISD::VFCMPHE;
3226 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3227 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3228 }
3229 llvm_unreachable("Bad mode");
3230
3231 case ISD::SETOGT:
3232 case ISD::SETGT:
3233 switch (Mode) {
3234 case CmpMode::Int: return SystemZISD::VICMPH;
3235 case CmpMode::FP: return SystemZISD::VFCMPH;
3236 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3237 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3238 }
3239 llvm_unreachable("Bad mode");
3240
3241 case ISD::SETUGT:
3242 switch (Mode) {
3243 case CmpMode::Int: return SystemZISD::VICMPHL;
3244 case CmpMode::FP: return 0;
3245 case CmpMode::StrictFP: return 0;
3246 case CmpMode::SignalingFP: return 0;
3247 }
3248 llvm_unreachable("Bad mode");
3249
3250 default:
3251 return 0;
3252 }
3253}
3254
3255// Return the SystemZISD vector comparison operation for CC or its inverse,
3256// or 0 if neither can be done directly. Indicate in Invert whether the
3257// result is for the inverse of CC. Mode is as above.
3259 bool &Invert) {
3260 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3261 Invert = false;
3262 return Opcode;
3263 }
3264
3265 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3266 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3267 Invert = true;
3268 return Opcode;
3269 }
3270
3271 return 0;
3272}
3273
3274// Return a v2f64 that contains the extended form of elements Start and Start+1
3275// of v4f32 value Op. If Chain is nonnull, return the strict form.
3276static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3277 SDValue Op, SDValue Chain) {
3278 int Mask[] = { Start, -1, Start + 1, -1 };
3279 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3280 if (Chain) {
3281 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3282 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3283 }
3284 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3285}
3286
3287// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3288// producing a result of type VT. If Chain is nonnull, return the strict form.
3289SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3290 const SDLoc &DL, EVT VT,
3291 SDValue CmpOp0,
3292 SDValue CmpOp1,
3293 SDValue Chain) const {
3294 // There is no hardware support for v4f32 (unless we have the vector
3295 // enhancements facility 1), so extend the vector into two v2f64s
3296 // and compare those.
3297 if (CmpOp0.getValueType() == MVT::v4f32 &&
3298 !Subtarget.hasVectorEnhancements1()) {
3299 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3300 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3301 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3302 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3303 if (Chain) {
3304 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3305 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3306 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3307 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3308 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3309 H1.getValue(1), L1.getValue(1),
3310 HRes.getValue(1), LRes.getValue(1) };
3311 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3312 SDValue Ops[2] = { Res, NewChain };
3313 return DAG.getMergeValues(Ops, DL);
3314 }
3315 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3316 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3317 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3318 }
3319 if (Chain) {
3320 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3321 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3322 }
3323 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3324}
3325
3326// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3327// an integer mask of type VT. If Chain is nonnull, we have a strict
3328// floating-point comparison. If in addition IsSignaling is true, we have
3329// a strict signaling floating-point comparison.
3330SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3331 const SDLoc &DL, EVT VT,
3333 SDValue CmpOp0,
3334 SDValue CmpOp1,
3335 SDValue Chain,
3336 bool IsSignaling) const {
3337 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3338 assert (!Chain || IsFP);
3339 assert (!IsSignaling || Chain);
3340 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3341 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3342 bool Invert = false;
3343 SDValue Cmp;
3344 switch (CC) {
3345 // Handle tests for order using (or (ogt y x) (oge x y)).
3346 case ISD::SETUO:
3347 Invert = true;
3348 [[fallthrough]];
3349 case ISD::SETO: {
3350 assert(IsFP && "Unexpected integer comparison");
3351 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3352 DL, VT, CmpOp1, CmpOp0, Chain);
3353 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3354 DL, VT, CmpOp0, CmpOp1, Chain);
3355 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3356 if (Chain)
3357 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3358 LT.getValue(1), GE.getValue(1));
3359 break;
3360 }
3361
3362 // Handle <> tests using (or (ogt y x) (ogt x y)).
3363 case ISD::SETUEQ:
3364 Invert = true;
3365 [[fallthrough]];
3366 case ISD::SETONE: {
3367 assert(IsFP && "Unexpected integer comparison");
3368 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3369 DL, VT, CmpOp1, CmpOp0, Chain);
3370 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3371 DL, VT, CmpOp0, CmpOp1, Chain);
3372 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3373 if (Chain)
3374 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3375 LT.getValue(1), GT.getValue(1));
3376 break;
3377 }
3378
3379 // Otherwise a single comparison is enough. It doesn't really
3380 // matter whether we try the inversion or the swap first, since
3381 // there are no cases where both work.
3382 default:
3383 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3384 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3385 else {
3387 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3388 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3389 else
3390 llvm_unreachable("Unhandled comparison");
3391 }
3392 if (Chain)
3393 Chain = Cmp.getValue(1);
3394 break;
3395 }
3396 if (Invert) {
3397 SDValue Mask =
3398 DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
3399 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3400 }
3401 if (Chain && Chain.getNode() != Cmp.getNode()) {
3402 SDValue Ops[2] = { Cmp, Chain };
3403 Cmp = DAG.getMergeValues(Ops, DL);
3404 }
3405 return Cmp;
3406}
3407
3408SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3409 SelectionDAG &DAG) const {
3410 SDValue CmpOp0 = Op.getOperand(0);
3411 SDValue CmpOp1 = Op.getOperand(1);
3412 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3413 SDLoc DL(Op);
3414 EVT VT = Op.getValueType();
3415 if (VT.isVector())
3416 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3417
3418 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3419 SDValue CCReg = emitCmp(DAG, DL, C);
3420 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3421}
3422
3423SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3424 SelectionDAG &DAG,
3425 bool IsSignaling) const {
3426 SDValue Chain = Op.getOperand(0);
3427 SDValue CmpOp0 = Op.getOperand(1);
3428 SDValue CmpOp1 = Op.getOperand(2);
3429 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3430 SDLoc DL(Op);
3431 EVT VT = Op.getNode()->getValueType(0);
3432 if (VT.isVector()) {
3433 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3434 Chain, IsSignaling);
3435 return Res.getValue(Op.getResNo());
3436 }
3437
3438 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3439 SDValue CCReg = emitCmp(DAG, DL, C);
3440 CCReg->setFlags(Op->getFlags());
3441 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3442 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3443 return DAG.getMergeValues(Ops, DL);
3444}
3445
3446SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3447 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3448 SDValue CmpOp0 = Op.getOperand(2);
3449 SDValue CmpOp1 = Op.getOperand(3);
3450 SDValue Dest = Op.getOperand(4);
3451 SDLoc DL(Op);
3452
3453 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3454 SDValue CCReg = emitCmp(DAG, DL, C);
3455 return DAG.getNode(
3456 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3457 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3458 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3459}
3460
3461// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3462// allowing Pos and Neg to be wider than CmpOp.
3463static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3464 return (Neg.getOpcode() == ISD::SUB &&
3465 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3466 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
3467 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3468 Pos.getOperand(0) == CmpOp)));
3469}
3470
3471// Return the absolute or negative absolute of Op; IsNegative decides which.
3473 bool IsNegative) {
3474 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3475 if (IsNegative)
3476 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3477 DAG.getConstant(0, DL, Op.getValueType()), Op);
3478 return Op;
3479}
3480
3481SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3482 SelectionDAG &DAG) const {
3483 SDValue CmpOp0 = Op.getOperand(0);
3484 SDValue CmpOp1 = Op.getOperand(1);
3485 SDValue TrueOp = Op.getOperand(2);
3486 SDValue FalseOp = Op.getOperand(3);
3487 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3488 SDLoc DL(Op);
3489
3490 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3491
3492 // Check for absolute and negative-absolute selections, including those
3493 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3494 // This check supplements the one in DAGCombiner.
3495 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3496 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3497 C.Op1.getOpcode() == ISD::Constant &&
3498 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3499 C.Op1->getAsZExtVal() == 0) {
3500 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3501 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3502 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3503 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3504 }
3505
3506 SDValue CCReg = emitCmp(DAG, DL, C);
3507 SDValue Ops[] = {TrueOp, FalseOp,
3508 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3509 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3510
3511 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3512}
3513
3514SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3515 SelectionDAG &DAG) const {
3516 SDLoc DL(Node);
3517 const GlobalValue *GV = Node->getGlobal();
3518 int64_t Offset = Node->getOffset();
3519 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3521
3523 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3524 if (isInt<32>(Offset)) {
3525 // Assign anchors at 1<<12 byte boundaries.
3526 uint64_t Anchor = Offset & ~uint64_t(0xfff);
3527 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3528 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3529
3530 // The offset can be folded into the address if it is aligned to a
3531 // halfword.
3532 Offset -= Anchor;
3533 if (Offset != 0 && (Offset & 1) == 0) {
3534 SDValue Full =
3535 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3536 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3537 Offset = 0;
3538 }
3539 } else {
3540 // Conservatively load a constant offset greater than 32 bits into a
3541 // register below.
3542 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3543 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3544 }
3545 } else if (Subtarget.isTargetELF()) {
3546 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3547 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3548 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3550 } else if (Subtarget.isTargetzOS()) {
3551 Result = getADAEntry(DAG, GV, DL, PtrVT);
3552 } else
3553 llvm_unreachable("Unexpected Subtarget");
3554
3555 // If there was a non-zero offset that we didn't fold, create an explicit
3556 // addition for it.
3557 if (Offset != 0)
3558 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3559 DAG.getConstant(Offset, DL, PtrVT));
3560
3561 return Result;
3562}
3563
3564SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3565 SelectionDAG &DAG,
3566 unsigned Opcode,
3567 SDValue GOTOffset) const {
3568 SDLoc DL(Node);
3569 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3570 SDValue Chain = DAG.getEntryNode();
3571 SDValue Glue;
3572
3575 report_fatal_error("In GHC calling convention TLS is not supported");
3576
3577 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3578 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3579 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3580 Glue = Chain.getValue(1);
3581 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3582 Glue = Chain.getValue(1);
3583
3584 // The first call operand is the chain and the second is the TLS symbol.
3586 Ops.push_back(Chain);
3587 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3588 Node->getValueType(0),
3589 0, 0));
3590
3591 // Add argument registers to the end of the list so that they are
3592 // known live into the call.
3593 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3594 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3595
3596 // Add a register mask operand representing the call-preserved registers.
3597 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3598 const uint32_t *Mask =
3599 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3600 assert(Mask && "Missing call preserved mask for calling convention");
3601 Ops.push_back(DAG.getRegisterMask(Mask));
3602
3603 // Glue the call to the argument copies.
3604 Ops.push_back(Glue);
3605
3606 // Emit the call.
3607 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3608 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3609 Glue = Chain.getValue(1);
3610
3611 // Copy the return value from %r2.
3612 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3613}
3614
3615SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3616 SelectionDAG &DAG) const {
3617 SDValue Chain = DAG.getEntryNode();
3618 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3619
3620 // The high part of the thread pointer is in access register 0.
3621 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3622 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3623
3624 // The low part of the thread pointer is in access register 1.
3625 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3626 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3627
3628 // Merge them into a single 64-bit address.
3629 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3630 DAG.getConstant(32, DL, PtrVT));
3631 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3632}
3633
3634SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3635 SelectionDAG &DAG) const {
3636 if (DAG.getTarget().useEmulatedTLS())
3637 return LowerToTLSEmulatedModel(Node, DAG);
3638 SDLoc DL(Node);
3639 const GlobalValue *GV = Node->getGlobal();
3640 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3641 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
3642
3645 report_fatal_error("In GHC calling convention TLS is not supported");
3646
3647 SDValue TP = lowerThreadPointer(DL, DAG);
3648
3649 // Get the offset of GA from the thread pointer, based on the TLS model.
3651 switch (model) {
3653 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3656
3657 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3658 Offset = DAG.getLoad(
3659 PtrVT, DL, DAG.getEntryNode(), Offset,
3661
3662 // Call __tls_get_offset to retrieve the offset.
3663 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3664 break;
3665 }
3666
3668 // Load the GOT offset of the module ID.
3671
3672 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3673 Offset = DAG.getLoad(
3674 PtrVT, DL, DAG.getEntryNode(), Offset,
3676
3677 // Call __tls_get_offset to retrieve the module base offset.
3678 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3679
3680 // Note: The SystemZLDCleanupPass will remove redundant computations
3681 // of the module base offset. Count total number of local-dynamic
3682 // accesses to trigger execution of that pass.
3686
3687 // Add the per-symbol offset.
3689
3690 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3691 DTPOffset = DAG.getLoad(
3692 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3694
3695 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3696 break;
3697 }
3698
3699 case TLSModel::InitialExec: {
3700 // Load the offset from the GOT.
3701 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3704 Offset =
3705 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3707 break;
3708 }
3709
3710 case TLSModel::LocalExec: {
3711 // Force the offset into the constant pool and load it from there.
3714
3715 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3716 Offset = DAG.getLoad(
3717 PtrVT, DL, DAG.getEntryNode(), Offset,
3719 break;
3720 }
3721 }
3722
3723 // Add the base and offset together.
3724 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3725}
3726
3727SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3728 SelectionDAG &DAG) const {
3729 SDLoc DL(Node);
3730 const BlockAddress *BA = Node->getBlockAddress();
3731 int64_t Offset = Node->getOffset();
3732 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3733
3734 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3735 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3736 return Result;
3737}
3738
3739SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3740 SelectionDAG &DAG) const {
3741 SDLoc DL(JT);
3742 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3743 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3744
3745 // Use LARL to load the address of the table.
3746 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3747}
3748
3749SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3750 SelectionDAG &DAG) const {
3751 SDLoc DL(CP);
3752 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3753
3755 if (CP->isMachineConstantPoolEntry())
3756 Result =
3757 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3758 else
3759 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3760 CP->getOffset());
3761
3762 // Use LARL to load the address of the constant pool entry.
3763 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3764}
3765
3766SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3767 SelectionDAG &DAG) const {
3768 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
3770 MachineFrameInfo &MFI = MF.getFrameInfo();
3771 MFI.setFrameAddressIsTaken(true);
3772
3773 SDLoc DL(Op);
3774 unsigned Depth = Op.getConstantOperandVal(0);
3775 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3776
3777 // By definition, the frame address is the address of the back chain. (In
3778 // the case of packed stack without backchain, return the address where the
3779 // backchain would have been stored. This will either be an unused space or
3780 // contain a saved register).
3781 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3782 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3783
3784 if (Depth > 0) {
3785 // FIXME The frontend should detect this case.
3786 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
3787 report_fatal_error("Unsupported stack frame traversal count");
3788
3789 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
3790 while (Depth--) {
3791 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
3793 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
3794 }
3795 }
3796
3797 return BackChain;
3798}
3799
3800SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3801 SelectionDAG &DAG) const {
3803 MachineFrameInfo &MFI = MF.getFrameInfo();
3804 MFI.setReturnAddressIsTaken(true);
3805
3807 return SDValue();
3808
3809 SDLoc DL(Op);
3810 unsigned Depth = Op.getConstantOperandVal(0);
3811 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3812
3813 if (Depth > 0) {
3814 // FIXME The frontend should detect this case.
3815 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
3816 report_fatal_error("Unsupported stack frame traversal count");
3817
3818 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
3819 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
3820 int Offset = TFL->getReturnAddressOffset(MF);
3821 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
3822 DAG.getConstant(Offset, DL, PtrVT));
3823 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
3825 }
3826
3827 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an
3828 // implicit live-in.
3831 &SystemZ::GR64BitRegClass);
3832 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3833}
3834
3835SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3836 SelectionDAG &DAG) const {
3837 SDLoc DL(Op);
3838 SDValue In = Op.getOperand(0);
3839 EVT InVT = In.getValueType();
3840 EVT ResVT = Op.getValueType();
3841
3842 // Convert loads directly. This is normally done by DAGCombiner,
3843 // but we need this case for bitcasts that are created during lowering
3844 // and which are then lowered themselves.
3845 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3846 if (ISD::isNormalLoad(LoadN)) {
3847 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3848 LoadN->getBasePtr(), LoadN->getMemOperand());
3849 // Update the chain uses.
3850 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3851 return NewLoad;
3852 }
3853
3854 if (InVT == MVT::i32 && ResVT == MVT::f32) {
3855 SDValue In64;
3856 if (Subtarget.hasHighWord()) {
3857 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3858 MVT::i64);
3859 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3860 MVT::i64, SDValue(U64, 0), In);
3861 } else {
3862 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3863 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3864 DAG.getConstant(32, DL, MVT::i64));
3865 }
3866 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3867 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3868 DL, MVT::f32, Out64);
3869 }
3870 if (InVT == MVT::f32 && ResVT == MVT::i32) {
3871 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3872 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3873 MVT::f64, SDValue(U64, 0), In);
3874 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3875 if (Subtarget.hasHighWord())
3876 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3877 MVT::i32, Out64);
3878 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3879 DAG.getConstant(32, DL, MVT::i64));
3880 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3881 }
3882 llvm_unreachable("Unexpected bitcast combination");
3883}
3884
3885SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3886 SelectionDAG &DAG) const {
3887
3888 if (Subtarget.isTargetXPLINK64())
3889 return lowerVASTART_XPLINK(Op, DAG);
3890 else
3891 return lowerVASTART_ELF(Op, DAG);
3892}
3893
3894SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
3895 SelectionDAG &DAG) const {
3897 SystemZMachineFunctionInfo *FuncInfo =
3899
3900 SDLoc DL(Op);
3901
3902 // vastart just stores the address of the VarArgsFrameIndex slot into the
3903 // memory location argument.
3904 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3905 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3906 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3907 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
3908 MachinePointerInfo(SV));
3909}
3910
3911SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
3912 SelectionDAG &DAG) const {
3914 SystemZMachineFunctionInfo *FuncInfo =
3916 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3917
3918 SDValue Chain = Op.getOperand(0);
3919 SDValue Addr = Op.getOperand(1);
3920 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3921 SDLoc DL(Op);
3922
3923 // The initial values of each field.
3924 const unsigned NumFields = 4;
3925 SDValue Fields[NumFields] = {
3926 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3927 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3928 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3929 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3930 };
3931
3932 // Store each field into its respective slot.
3933 SDValue MemOps[NumFields];
3934 unsigned Offset = 0;
3935 for (unsigned I = 0; I < NumFields; ++I) {
3936 SDValue FieldAddr = Addr;
3937 if (Offset != 0)
3938 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3940 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3942 Offset += 8;
3943 }
3944 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3945}
3946
3947SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3948 SelectionDAG &DAG) const {
3949 SDValue Chain = Op.getOperand(0);
3950 SDValue DstPtr = Op.getOperand(1);
3951 SDValue SrcPtr = Op.getOperand(2);
3952 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3953 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3954 SDLoc DL(Op);
3955
3956 uint32_t Sz =
3957 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
3958 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
3959 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3960 /*isTailCall*/ false, MachinePointerInfo(DstSV),
3961 MachinePointerInfo(SrcSV));
3962}
3963
3964SDValue
3965SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
3966 SelectionDAG &DAG) const {
3967 if (Subtarget.isTargetXPLINK64())
3968 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
3969 else
3970 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
3971}
3972
3973SDValue
3974SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
3975 SelectionDAG &DAG) const {
3976 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3978 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3979 SDValue Chain = Op.getOperand(0);
3980 SDValue Size = Op.getOperand(1);
3981 SDValue Align = Op.getOperand(2);
3982 SDLoc DL(Op);
3983
3984 // If user has set the no alignment function attribute, ignore
3985 // alloca alignments.
3986 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
3987
3988 uint64_t StackAlign = TFI->getStackAlignment();
3989 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3990 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3991
3992 SDValue NeededSpace = Size;
3993
3994 // Add extra space for alignment if needed.
3995 EVT PtrVT = getPointerTy(MF.getDataLayout());
3996 if (ExtraAlignSpace)
3997 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
3998 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3999
4000 bool IsSigned = false;
4001 bool DoesNotReturn = false;
4002 bool IsReturnValueUsed = false;
4003 EVT VT = Op.getValueType();
4004 SDValue AllocaCall =
4005 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
4006 CallingConv::C, IsSigned, DL, DoesNotReturn,
4007 IsReturnValueUsed)
4008 .first;
4009
4010 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4011 // to end of call in order to ensure it isn't broken up from the call
4012 // sequence.
4013 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4014 Register SPReg = Regs.getStackPointerRegister();
4015 Chain = AllocaCall.getValue(1);
4016 SDValue Glue = AllocaCall.getValue(2);
4017 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4018 Chain = NewSPRegNode.getValue(1);
4019
4020 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4021 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4022 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4023
4024 // Dynamically realign if needed.
4025 if (ExtraAlignSpace) {
4026 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4027 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4028 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4029 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4030 }
4031
4032 SDValue Ops[2] = {Result, Chain};
4033 return DAG.getMergeValues(Ops, DL);
4034}
4035
4036SDValue
4037SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4038 SelectionDAG &DAG) const {
4039 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4041 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4042 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4043
4044 SDValue Chain = Op.getOperand(0);
4045 SDValue Size = Op.getOperand(1);
4046 SDValue Align = Op.getOperand(2);
4047 SDLoc DL(Op);
4048
4049 // If user has set the no alignment function attribute, ignore
4050 // alloca alignments.
4051 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4052
4053 uint64_t StackAlign = TFI->getStackAlignment();
4054 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4055 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4056
4058 SDValue NeededSpace = Size;
4059
4060 // Get a reference to the stack pointer.
4061 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4062
4063 // If we need a backchain, save it now.
4064 SDValue Backchain;
4065 if (StoreBackchain)
4066 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4068
4069 // Add extra space for alignment if needed.
4070 if (ExtraAlignSpace)
4071 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4072 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4073
4074 // Get the new stack pointer value.
4075 SDValue NewSP;
4076 if (hasInlineStackProbe(MF)) {
4078 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4079 Chain = NewSP.getValue(1);
4080 }
4081 else {
4082 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4083 // Copy the new stack pointer back.
4084 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4085 }
4086
4087 // The allocated data lives above the 160 bytes allocated for the standard
4088 // frame, plus any outgoing stack arguments. We don't know how much that
4089 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4090 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4091 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4092
4093 // Dynamically realign if needed.
4094 if (RequiredAlign > StackAlign) {
4095 Result =
4096 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4097 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4098 Result =
4099 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4100 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4101 }
4102
4103 if (StoreBackchain)
4104 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4106
4107 SDValue Ops[2] = { Result, Chain };
4108 return DAG.getMergeValues(Ops, DL);
4109}
4110
4111SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4112 SDValue Op, SelectionDAG &DAG) const {
4113 SDLoc DL(Op);
4114
4115 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4116}
4117
4118SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4119 SelectionDAG &DAG) const {
4120 EVT VT = Op.getValueType();
4121 SDLoc DL(Op);
4122 SDValue Ops[2];
4123 if (is32Bit(VT))
4124 // Just do a normal 64-bit multiplication and extract the results.
4125 // We define this so that it can be used for constant division.
4126 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4127 Op.getOperand(1), Ops[1], Ops[0]);
4128 else if (Subtarget.hasMiscellaneousExtensions2())
4129 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4130 // the high result in the even register. ISD::SMUL_LOHI is defined to
4131 // return the low half first, so the results are in reverse order.
4133 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4134 else {
4135 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4136 //
4137 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4138 //
4139 // but using the fact that the upper halves are either all zeros
4140 // or all ones:
4141 //
4142 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4143 //
4144 // and grouping the right terms together since they are quicker than the
4145 // multiplication:
4146 //
4147 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4148 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4149 SDValue LL = Op.getOperand(0);
4150 SDValue RL = Op.getOperand(1);
4151 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4152 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4153 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4154 // the high result in the even register. ISD::SMUL_LOHI is defined to
4155 // return the low half first, so the results are in reverse order.
4157 LL, RL, Ops[1], Ops[0]);
4158 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4159 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4160 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4161 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4162 }
4163 return DAG.getMergeValues(Ops, DL);
4164}
4165
4166SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4167 SelectionDAG &DAG) const {
4168 EVT VT = Op.getValueType();
4169 SDLoc DL(Op);
4170 SDValue Ops[2];
4171 if (is32Bit(VT))
4172 // Just do a normal 64-bit multiplication and extract the results.
4173 // We define this so that it can be used for constant division.
4174 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4175 Op.getOperand(1), Ops[1], Ops[0]);
4176 else
4177 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4178 // the high result in the even register. ISD::UMUL_LOHI is defined to
4179 // return the low half first, so the results are in reverse order.
4181 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4182 return DAG.getMergeValues(Ops, DL);
4183}
4184
4185SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4186 SelectionDAG &DAG) const {
4187 SDValue Op0 = Op.getOperand(0);
4188 SDValue Op1 = Op.getOperand(1);
4189 EVT VT = Op.getValueType();
4190 SDLoc DL(Op);
4191
4192 // We use DSGF for 32-bit division. This means the first operand must
4193 // always be 64-bit, and the second operand should be 32-bit whenever
4194 // that is possible, to improve performance.
4195 if (is32Bit(VT))
4196 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4197 else if (DAG.ComputeNumSignBits(Op1) > 32)
4198 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4199
4200 // DSG(F) returns the remainder in the even register and the
4201 // quotient in the odd register.
4202 SDValue Ops[2];
4203 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4204 return DAG.getMergeValues(Ops, DL);
4205}
4206
4207SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4208 SelectionDAG &DAG) const {
4209 EVT VT = Op.getValueType();
4210 SDLoc DL(Op);
4211
4212 // DL(G) returns the remainder in the even register and the
4213 // quotient in the odd register.
4214 SDValue Ops[2];
4216 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4217 return DAG.getMergeValues(Ops, DL);
4218}
4219
4220SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4221 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4222
4223 // Get the known-zero masks for each operand.
4224 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4225 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4226 DAG.computeKnownBits(Ops[1])};
4227
4228 // See if the upper 32 bits of one operand and the lower 32 bits of the
4229 // other are known zero. They are the low and high operands respectively.
4230 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4231 Known[1].Zero.getZExtValue() };
4232 unsigned High, Low;
4233 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4234 High = 1, Low = 0;
4235 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4236 High = 0, Low = 1;
4237 else
4238 return Op;
4239
4240 SDValue LowOp = Ops[Low];
4241 SDValue HighOp = Ops[High];
4242
4243 // If the high part is a constant, we're better off using IILH.
4244 if (HighOp.getOpcode() == ISD::Constant)
4245 return Op;
4246
4247 // If the low part is a constant that is outside the range of LHI,
4248 // then we're better off using IILF.
4249 if (LowOp.getOpcode() == ISD::Constant) {
4250 int64_t Value = int32_t(LowOp->getAsZExtVal());
4251 if (!isInt<16>(Value))
4252 return Op;
4253 }
4254
4255 // Check whether the high part is an AND that doesn't change the
4256 // high 32 bits and just masks out low bits. We can skip it if so.
4257 if (HighOp.getOpcode() == ISD::AND &&
4258 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4259 SDValue HighOp0 = HighOp.getOperand(0);
4261 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4262 HighOp = HighOp0;
4263 }
4264
4265 // Take advantage of the fact that all GR32 operations only change the
4266 // low 32 bits by truncating Low to an i32 and inserting it directly
4267 // using a subreg. The interesting cases are those where the truncation
4268 // can be folded.
4269 SDLoc DL(Op);
4270 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4271 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4272 MVT::i64, HighOp, Low32);
4273}
4274
4275// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4276SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4277 SelectionDAG &DAG) const {
4278 SDNode *N = Op.getNode();
4279 SDValue LHS = N->getOperand(0);
4280 SDValue RHS = N->getOperand(1);
4281 SDLoc DL(N);
4282
4283 if (N->getValueType(0) == MVT::i128) {
4284 unsigned BaseOp = 0;
4285 unsigned FlagOp = 0;
4286 bool IsBorrow = false;
4287 switch (Op.getOpcode()) {
4288 default: llvm_unreachable("Unknown instruction!");
4289 case ISD::UADDO:
4290 BaseOp = ISD::ADD;
4291 FlagOp = SystemZISD::VACC;
4292 break;
4293 case ISD::USUBO:
4294 BaseOp = ISD::SUB;
4295 FlagOp = SystemZISD::VSCBI;
4296 IsBorrow = true;
4297 break;
4298 }
4299 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4300 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4301 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4302 DAG.getValueType(MVT::i1));
4303 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4304 if (IsBorrow)
4305 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4306 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4307 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4308 }
4309
4310 unsigned BaseOp = 0;
4311 unsigned CCValid = 0;
4312 unsigned CCMask = 0;
4313
4314 switch (Op.getOpcode()) {
4315 default: llvm_unreachable("Unknown instruction!");
4316 case ISD::SADDO:
4317 BaseOp = SystemZISD::SADDO;
4318 CCValid = SystemZ::CCMASK_ARITH;
4320 break;
4321 case ISD::SSUBO:
4322 BaseOp = SystemZISD::SSUBO;
4323 CCValid = SystemZ::CCMASK_ARITH;
4325 break;
4326 case ISD::UADDO:
4327 BaseOp = SystemZISD::UADDO;
4328 CCValid = SystemZ::CCMASK_LOGICAL;
4330 break;
4331 case ISD::USUBO:
4332 BaseOp = SystemZISD::USUBO;
4333 CCValid = SystemZ::CCMASK_LOGICAL;
4335 break;
4336 }
4337
4338 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4339 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4340
4341 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4342 if (N->getValueType(1) == MVT::i1)
4343 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4344
4345 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4346}
4347
4348static bool isAddCarryChain(SDValue Carry) {
4349 while (Carry.getOpcode() == ISD::UADDO_CARRY)
4350 Carry = Carry.getOperand(2);
4351 return Carry.getOpcode() == ISD::UADDO;
4352}
4353
4354static bool isSubBorrowChain(SDValue Carry) {
4355 while (Carry.getOpcode() == ISD::USUBO_CARRY)
4356 Carry = Carry.getOperand(2);
4357 return Carry.getOpcode() == ISD::USUBO;
4358}
4359
4360// Lower UADDO_CARRY/USUBO_CARRY nodes.
4361SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4362 SelectionDAG &DAG) const {
4363
4364 SDNode *N = Op.getNode();
4365 MVT VT = N->getSimpleValueType(0);
4366
4367 // Let legalize expand this if it isn't a legal type yet.
4368 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4369 return SDValue();
4370
4371 SDValue LHS = N->getOperand(0);
4372 SDValue RHS = N->getOperand(1);
4373 SDValue Carry = Op.getOperand(2);
4374 SDLoc DL(N);
4375
4376 if (VT == MVT::i128) {
4377 unsigned BaseOp = 0;
4378 unsigned FlagOp = 0;
4379 bool IsBorrow = false;
4380 switch (Op.getOpcode()) {
4381 default: llvm_unreachable("Unknown instruction!");
4382 case ISD::UADDO_CARRY:
4383 BaseOp = SystemZISD::VAC;
4384 FlagOp = SystemZISD::VACCC;
4385 break;
4386 case ISD::USUBO_CARRY:
4387 BaseOp = SystemZISD::VSBI;
4388 FlagOp = SystemZISD::VSBCBI;
4389 IsBorrow = true;
4390 break;
4391 }
4392 if (IsBorrow)
4393 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4394 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
4395 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
4396 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
4397 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
4398 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4399 DAG.getValueType(MVT::i1));
4400 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4401 if (IsBorrow)
4402 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4403 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4404 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4405 }
4406
4407 unsigned BaseOp = 0;
4408 unsigned CCValid = 0;
4409 unsigned CCMask = 0;
4410
4411 switch (Op.getOpcode()) {
4412 default: llvm_unreachable("Unknown instruction!");
4413 case ISD::UADDO_CARRY:
4414 if (!isAddCarryChain(Carry))
4415 return SDValue();
4416
4417 BaseOp = SystemZISD::ADDCARRY;
4418 CCValid = SystemZ::CCMASK_LOGICAL;
4420 break;
4421 case ISD::USUBO_CARRY:
4422 if (!isSubBorrowChain(Carry))
4423 return SDValue();
4424
4425 BaseOp = SystemZISD::SUBCARRY;
4426 CCValid = SystemZ::CCMASK_LOGICAL;
4428 break;
4429 }
4430
4431 // Set the condition code from the carry flag.
4432 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4433 DAG.getConstant(CCValid, DL, MVT::i32),
4434 DAG.getConstant(CCMask, DL, MVT::i32));
4435
4436 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4437 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4438
4439 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4440 if (N->getValueType(1) == MVT::i1)
4441 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4442
4443 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4444}
4445
4446SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4447 SelectionDAG &DAG) const {
4448 EVT VT = Op.getValueType();
4449 SDLoc DL(Op);
4450 Op = Op.getOperand(0);
4451
4452 if (VT.getScalarSizeInBits() == 128) {
4453 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
4454 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
4455 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
4456 DAG.getConstant(0, DL, MVT::i64));
4457 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4458 return Op;
4459 }
4460
4461 // Handle vector types via VPOPCT.
4462 if (VT.isVector()) {
4463 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4464 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4465 switch (VT.getScalarSizeInBits()) {
4466 case 8:
4467 break;
4468 case 16: {
4469 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4470 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
4471 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
4472 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4473 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
4474 break;
4475 }
4476 case 32: {
4477 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4478 DAG.getConstant(0, DL, MVT::i32));
4479 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4480 break;
4481 }
4482 case 64: {
4483 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4484 DAG.getConstant(0, DL, MVT::i32));
4485 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
4486 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4487 break;
4488 }
4489 default:
4490 llvm_unreachable("Unexpected type");
4491 }
4492 return Op;
4493 }
4494
4495 // Get the known-zero mask for the operand.
4496 KnownBits Known = DAG.computeKnownBits(Op);
4497 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
4498 if (NumSignificantBits == 0)
4499 return DAG.getConstant(0, DL, VT);
4500
4501 // Skip known-zero high parts of the operand.
4502 int64_t OrigBitSize = VT.getSizeInBits();
4503 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
4504 BitSize = std::min(BitSize, OrigBitSize);
4505
4506 // The POPCNT instruction counts the number of bits in each byte.
4507 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
4508 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
4509 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
4510
4511 // Add up per-byte counts in a binary tree. All bits of Op at
4512 // position larger than BitSize remain zero throughout.
4513 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
4514 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
4515 if (BitSize != OrigBitSize)
4516 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
4517 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
4518 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4519 }
4520
4521 // Extract overall result from high byte.
4522 if (BitSize > 8)
4523 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
4524 DAG.getConstant(BitSize - 8, DL, VT));
4525
4526 return Op;
4527}
4528
4529SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
4530 SelectionDAG &DAG) const {
4531 SDLoc DL(Op);
4532 AtomicOrdering FenceOrdering =
4533 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4534 SyncScope::ID FenceSSID =
4535 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4536
4537 // The only fence that needs an instruction is a sequentially-consistent
4538 // cross-thread fence.
4539 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
4540 FenceSSID == SyncScope::System) {
4541 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
4542 Op.getOperand(0)),
4543 0);
4544 }
4545
4546 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4547 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
4548}
4549
4550SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
4551 SelectionDAG &DAG) const {
4552 auto *Node = cast<AtomicSDNode>(Op.getNode());
4553 assert(Node->getMemoryVT() == MVT::i128 && "Only custom lowering i128.");
4554 // Use same code to handle both legal and non-legal i128 types.
4557 return DAG.getMergeValues(Results, SDLoc(Op));
4558}
4559
4560// Prepare for a Compare And Swap for a subword operation. This needs to be
4561// done in memory with 4 bytes at natural alignment.
4563 SDValue &AlignedAddr, SDValue &BitShift,
4564 SDValue &NegBitShift) {
4565 EVT PtrVT = Addr.getValueType();
4566 EVT WideVT = MVT::i32;
4567
4568 // Get the address of the containing word.
4569 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4570 DAG.getConstant(-4, DL, PtrVT));
4571
4572 // Get the number of bits that the word must be rotated left in order
4573 // to bring the field to the top bits of a GR32.
4574 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4575 DAG.getConstant(3, DL, PtrVT));
4576 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4577
4578 // Get the complementing shift amount, for rotating a field in the top
4579 // bits back to its proper position.
4580 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4581 DAG.getConstant(0, DL, WideVT), BitShift);
4582
4583}
4584
4585// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
4586// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
4587SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
4588 SelectionDAG &DAG,
4589 unsigned Opcode) const {
4590 auto *Node = cast<AtomicSDNode>(Op.getNode());
4591
4592 // 32-bit operations need no special handling.
4593 EVT NarrowVT = Node->getMemoryVT();
4594 EVT WideVT = MVT::i32;
4595 if (NarrowVT == WideVT)
4596 return Op;
4597
4598 int64_t BitSize = NarrowVT.getSizeInBits();
4599 SDValue ChainIn = Node->getChain();
4600 SDValue Addr = Node->getBasePtr();
4601 SDValue Src2 = Node->getVal();
4602 MachineMemOperand *MMO = Node->getMemOperand();
4603 SDLoc DL(Node);
4604
4605 // Convert atomic subtracts of constants into additions.
4606 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
4607 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
4609 Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
4610 }
4611
4612 SDValue AlignedAddr, BitShift, NegBitShift;
4613 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4614
4615 // Extend the source operand to 32 bits and prepare it for the inner loop.
4616 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
4617 // operations require the source to be shifted in advance. (This shift
4618 // can be folded if the source is constant.) For AND and NAND, the lower
4619 // bits must be set, while for other opcodes they should be left clear.
4620 if (Opcode != SystemZISD::ATOMIC_SWAPW)
4621 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
4622 DAG.getConstant(32 - BitSize, DL, WideVT));
4623 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
4625 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
4626 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
4627
4628 // Construct the ATOMIC_LOADW_* node.
4629 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
4630 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
4631 DAG.getConstant(BitSize, DL, WideVT) };
4632 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
4633 NarrowVT, MMO);
4634
4635 // Rotate the result of the final CS so that the field is in the lower
4636 // bits of a GR32, then truncate it.
4637 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
4638 DAG.getConstant(BitSize, DL, WideVT));
4639 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
4640
4641 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
4642 return DAG.getMergeValues(RetOps, DL);
4643}
4644
4645// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
4646// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
4647SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
4648 SelectionDAG &DAG) const {
4649 auto *Node = cast<AtomicSDNode>(Op.getNode());
4650 EVT MemVT = Node->getMemoryVT();
4651 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
4652 // A full-width operation: negate and use LAA(G).
4653 assert(Op.getValueType() == MemVT && "Mismatched VTs");
4654 assert(Subtarget.hasInterlockedAccess1() &&
4655 "Should have been expanded by AtomicExpand pass.");
4656 SDValue Src2 = Node->getVal();
4657 SDLoc DL(Src2);
4658 SDValue NegSrc2 =
4659 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
4660 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
4661 Node->getChain(), Node->getBasePtr(), NegSrc2,
4662 Node->getMemOperand());
4663 }
4664
4665 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
4666}
4667
4668// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
4669SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
4670 SelectionDAG &DAG) const {
4671 auto *Node = cast<AtomicSDNode>(Op.getNode());
4672 SDValue ChainIn = Node->getOperand(0);
4673 SDValue Addr = Node->getOperand(1);
4674 SDValue CmpVal = Node->getOperand(2);
4675 SDValue SwapVal = Node->getOperand(3);
4676 MachineMemOperand *MMO = Node->getMemOperand();
4677 SDLoc DL(Node);
4678
4679 if (Node->getMemoryVT() == MVT::i128) {
4680 // Use same code to handle both legal and non-legal i128 types.
4683 return DAG.getMergeValues(Results, DL);
4684 }
4685
4686 // We have native support for 32-bit and 64-bit compare and swap, but we
4687 // still need to expand extracting the "success" result from the CC.
4688 EVT NarrowVT = Node->getMemoryVT();
4689 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
4690 if (NarrowVT == WideVT) {
4691 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4692 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
4694 DL, Tys, Ops, NarrowVT, MMO);
4695 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4697
4698 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
4699 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4700 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4701 return SDValue();
4702 }
4703
4704 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
4705 // via a fullword ATOMIC_CMP_SWAPW operation.
4706 int64_t BitSize = NarrowVT.getSizeInBits();
4707
4708 SDValue AlignedAddr, BitShift, NegBitShift;
4709 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4710
4711 // Construct the ATOMIC_CMP_SWAPW node.
4712 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4713 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
4714 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
4716 VTList, Ops, NarrowVT, MMO);
4717 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4719
4720 // emitAtomicCmpSwapW() will zero extend the result (original value).
4721 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
4722 DAG.getValueType(NarrowVT));
4723 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
4724 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4725 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4726 return SDValue();
4727}
4728
4730SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
4731 // Because of how we convert atomic_load and atomic_store to normal loads and
4732 // stores in the DAG, we need to ensure that the MMOs are marked volatile
4733 // since DAGCombine hasn't been updated to account for atomic, but non
4734 // volatile loads. (See D57601)
4735 if (auto *SI = dyn_cast<StoreInst>(&I))
4736 if (SI->isAtomic())
4738 if (auto *LI = dyn_cast<LoadInst>(&I))
4739 if (LI->isAtomic())
4741 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
4742 if (AI->isAtomic())
4744 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
4745 if (AI->isAtomic())
4748}
4749
4750SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
4751 SelectionDAG &DAG) const {
4753 auto *Regs = Subtarget.getSpecialRegisters();
4755 report_fatal_error("Variable-sized stack allocations are not supported "
4756 "in GHC calling convention");
4757 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
4758 Regs->getStackPointerRegister(), Op.getValueType());
4759}
4760
4761SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
4762 SelectionDAG &DAG) const {
4764 auto *Regs = Subtarget.getSpecialRegisters();
4765 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4766
4768 report_fatal_error("Variable-sized stack allocations are not supported "
4769 "in GHC calling convention");
4770
4771 SDValue Chain = Op.getOperand(0);
4772 SDValue NewSP = Op.getOperand(1);
4773 SDValue Backchain;
4774 SDLoc DL(Op);
4775
4776 if (StoreBackchain) {
4777 SDValue OldSP = DAG.getCopyFromReg(
4778 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
4779 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4781 }
4782
4783 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
4784
4785 if (StoreBackchain)
4786 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4788
4789 return Chain;
4790}
4791
4792SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
4793 SelectionDAG &DAG) const {
4794 bool IsData = Op.getConstantOperandVal(4);
4795 if (!IsData)
4796 // Just preserve the chain.
4797 return Op.getOperand(0);
4798
4799 SDLoc DL(Op);
4800 bool IsWrite = Op.getConstantOperandVal(2);
4801 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
4802 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
4803 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
4804 Op.getOperand(1)};
4806 Node->getVTList(), Ops,
4807 Node->getMemoryVT(), Node->getMemOperand());
4808}
4809
4810// Convert condition code in CCReg to an i32 value.
4812 SDLoc DL(CCReg);
4813 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
4814 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
4815 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
4816}
4817
4818SDValue
4819SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4820 SelectionDAG &DAG) const {
4821 unsigned Opcode, CCValid;
4822 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
4823 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
4824 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
4825 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
4826 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
4827 return SDValue();
4828 }
4829
4830 return SDValue();
4831}
4832
4833SDValue
4834SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4835 SelectionDAG &DAG) const {
4836 unsigned Opcode, CCValid;
4837 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
4838 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
4839 if (Op->getNumValues() == 1)
4840 return getCCResult(DAG, SDValue(Node, 0));
4841 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
4842 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
4843 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
4844 }
4845
4846 unsigned Id = Op.getConstantOperandVal(0);
4847 switch (Id) {
4848 case Intrinsic::thread_pointer:
4849 return lowerThreadPointer(SDLoc(Op), DAG);
4850
4851 case Intrinsic::s390_vpdi:
4852 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
4853 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4854
4855 case Intrinsic::s390_vperm:
4856 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
4857 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4858
4859 case Intrinsic::s390_vuphb:
4860 case Intrinsic::s390_vuphh:
4861 case Intrinsic::s390_vuphf:
4862 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
4863 Op.getOperand(1));
4864
4865 case Intrinsic::s390_vuplhb:
4866 case Intrinsic::s390_vuplhh:
4867 case Intrinsic::s390_vuplhf:
4868 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
4869 Op.getOperand(1));
4870
4871 case Intrinsic::s390_vuplb:
4872 case Intrinsic::s390_vuplhw:
4873 case Intrinsic::s390_vuplf:
4874 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
4875 Op.getOperand(1));
4876
4877 case Intrinsic::s390_vupllb:
4878 case Intrinsic::s390_vupllh:
4879 case Intrinsic::s390_vupllf:
4880 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
4881 Op.getOperand(1));
4882
4883 case Intrinsic::s390_vsumb:
4884 case Intrinsic::s390_vsumh:
4885 case Intrinsic::s390_vsumgh:
4886 case Intrinsic::s390_vsumgf:
4887 case Intrinsic::s390_vsumqf:
4888 case Intrinsic::s390_vsumqg:
4889 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
4890 Op.getOperand(1), Op.getOperand(2));
4891
4892 case Intrinsic::s390_vaq:
4893 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
4894 Op.getOperand(1), Op.getOperand(2));
4895 case Intrinsic::s390_vaccb:
4896 case Intrinsic::s390_vacch:
4897 case Intrinsic::s390_vaccf:
4898 case Intrinsic::s390_vaccg:
4899 case Intrinsic::s390_vaccq:
4900 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
4901 Op.getOperand(1), Op.getOperand(2));
4902 case Intrinsic::s390_vacq:
4903 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
4904 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4905 case Intrinsic::s390_vacccq:
4906 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
4907 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4908
4909 case Intrinsic::s390_vsq:
4910 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
4911 Op.getOperand(1), Op.getOperand(2));
4912 case Intrinsic::s390_vscbib:
4913 case Intrinsic::s390_vscbih:
4914 case Intrinsic::s390_vscbif:
4915 case Intrinsic::s390_vscbig:
4916 case Intrinsic::s390_vscbiq:
4917 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
4918 Op.getOperand(1), Op.getOperand(2));
4919 case Intrinsic::s390_vsbiq:
4920 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
4921 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4922 case Intrinsic::s390_vsbcbiq:
4923 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
4924 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4925 }
4926
4927 return SDValue();
4928}
4929
4930namespace {
4931// Says that SystemZISD operation Opcode can be used to perform the equivalent
4932// of a VPERM with permute vector Bytes. If Opcode takes three operands,
4933// Operand is the constant third operand, otherwise it is the number of
4934// bytes in each element of the result.
4935struct Permute {
4936 unsigned Opcode;
4937 unsigned Operand;
4938 unsigned char Bytes[SystemZ::VectorBytes];
4939};
4940}
4941
4942static const Permute PermuteForms[] = {
4943 // VMRHG
4945 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
4946 // VMRHF
4948 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
4949 // VMRHH
4951 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
4952 // VMRHB
4954 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
4955 // VMRLG
4957 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
4958 // VMRLF
4960 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
4961 // VMRLH
4963 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
4964 // VMRLB
4966 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
4967 // VPKG
4968 { SystemZISD::PACK, 4,
4969 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
4970 // VPKF
4971 { SystemZISD::PACK, 2,
4972 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
4973 // VPKH
4974 { SystemZISD::PACK, 1,
4975 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
4976 // VPDI V1, V2, 4 (low half of V1, high half of V2)
4978 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
4979 // VPDI V1, V2, 1 (high half of V1, low half of V2)
4981 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
4982};
4983
4984// Called after matching a vector shuffle against a particular pattern.
4985// Both the original shuffle and the pattern have two vector operands.
4986// OpNos[0] is the operand of the original shuffle that should be used for
4987// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
4988// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
4989// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
4990// for operands 0 and 1 of the pattern.
4991static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
4992 if (OpNos[0] < 0) {
4993 if (OpNos[1] < 0)
4994 return false;
4995 OpNo0 = OpNo1 = OpNos[1];
4996 } else if (OpNos[1] < 0) {
4997 OpNo0 = OpNo1 = OpNos[0];
4998 } else {
4999 OpNo0 = OpNos[0];
5000 OpNo1 = OpNos[1];
5001 }
5002 return true;
5003}
5004
5005// Bytes is a VPERM-like permute vector, except that -1 is used for
5006// undefined bytes. Return true if the VPERM can be implemented using P.
5007// When returning true set OpNo0 to the VPERM operand that should be
5008// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5009//
5010// For example, if swapping the VPERM operands allows P to match, OpNo0
5011// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5012// operand, but rewriting it to use two duplicated operands allows it to
5013// match P, then OpNo0 and OpNo1 will be the same.
5014static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5015 unsigned &OpNo0, unsigned &OpNo1) {
5016 int OpNos[] = { -1, -1 };
5017 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5018 int Elt = Bytes[I];
5019 if (Elt >= 0) {
5020 // Make sure that the two permute vectors use the same suboperand
5021 // byte number. Only the operand numbers (the high bits) are
5022 // allowed to differ.
5023 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5024 return false;
5025 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5026 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5027 // Make sure that the operand mappings are consistent with previous
5028 // elements.
5029 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5030 return false;
5031 OpNos[ModelOpNo] = RealOpNo;
5032 }
5033 }
5034 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5035}
5036
5037// As above, but search for a matching permute.
5038static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5039 unsigned &OpNo0, unsigned &OpNo1) {
5040 for (auto &P : PermuteForms)
5041 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5042 return &P;
5043 return nullptr;
5044}
5045
5046// Bytes is a VPERM-like permute vector, except that -1 is used for
5047// undefined bytes. This permute is an operand of an outer permute.
5048// See whether redistributing the -1 bytes gives a shuffle that can be
5049// implemented using P. If so, set Transform to a VPERM-like permute vector
5050// that, when applied to the result of P, gives the original permute in Bytes.
5052 const Permute &P,
5053 SmallVectorImpl<int> &Transform) {
5054 unsigned To = 0;
5055 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5056 int Elt = Bytes[From];
5057 if (Elt < 0)
5058 // Byte number From of the result is undefined.
5059 Transform[From] = -1;
5060 else {
5061 while (P.Bytes[To] != Elt) {
5062 To += 1;
5063 if (To == SystemZ::VectorBytes)
5064 return false;
5065 }
5066 Transform[From] = To;
5067 }
5068 }
5069 return true;
5070}
5071
5072// As above, but search for a matching permute.
5073static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5074 SmallVectorImpl<int> &Transform) {
5075 for (auto &P : PermuteForms)
5076 if (matchDoublePermute(Bytes, P, Transform))
5077 return &P;
5078 return nullptr;
5079}
5080
5081// Convert the mask of the given shuffle op into a byte-level mask,
5082// as if it had type vNi8.
5083static bool getVPermMask(SDValue ShuffleOp,
5084 SmallVectorImpl<int> &Bytes) {
5085 EVT VT = ShuffleOp.getValueType();
5086 unsigned NumElements = VT.getVectorNumElements();
5087 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5088
5089 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5090 Bytes.resize(NumElements * BytesPerElement, -1);
5091 for (unsigned I = 0; I < NumElements; ++I) {
5092 int Index = VSN->getMaskElt(I);
5093 if (Index >= 0)
5094 for (unsigned J = 0; J < BytesPerElement; ++J)
5095 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5096 }
5097 return true;
5098 }
5099 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5100 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5101 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5102 Bytes.resize(NumElements * BytesPerElement, -1);
5103 for (unsigned I = 0; I < NumElements; ++I)
5104 for (unsigned J = 0; J < BytesPerElement; ++J)
5105 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5106 return true;
5107 }
5108 return false;
5109}
5110
5111// Bytes is a VPERM-like permute vector, except that -1 is used for
5112// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5113// the result come from a contiguous sequence of bytes from one input.
5114// Set Base to the selector for the first byte if so.
5115static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5116 unsigned BytesPerElement, int &Base) {
5117 Base = -1;
5118 for (unsigned I = 0; I < BytesPerElement; ++I) {
5119 if (Bytes[Start + I] >= 0) {
5120 unsigned Elem = Bytes[Start + I];
5121 if (Base < 0) {
5122 Base = Elem - I;
5123 // Make sure the bytes would come from one input operand.
5124 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5125 return false;
5126 } else if (unsigned(Base) != Elem - I)
5127 return false;
5128 }
5129 }
5130 return true;
5131}
5132
5133// Bytes is a VPERM-like permute vector, except that -1 is used for
5134// undefined bytes. Return true if it can be performed using VSLDB.
5135// When returning true, set StartIndex to the shift amount and OpNo0
5136// and OpNo1 to the VPERM operands that should be used as the first
5137// and second shift operand respectively.
5139 unsigned &StartIndex, unsigned &OpNo0,
5140 unsigned &OpNo1) {
5141 int OpNos[] = { -1, -1 };
5142 int Shift = -1;
5143 for (unsigned I = 0; I < 16; ++I) {
5144 int Index = Bytes[I];
5145 if (Index >= 0) {
5146 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5147 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5148 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5149 if (Shift < 0)
5150 Shift = ExpectedShift;
5151 else if (Shift != ExpectedShift)
5152 return false;
5153 // Make sure that the operand mappings are consistent with previous
5154 // elements.
5155 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5156 return false;
5157 OpNos[ModelOpNo] = RealOpNo;
5158 }
5159 }
5160 StartIndex = Shift;
5161 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5162}
5163
5164// Create a node that performs P on operands Op0 and Op1, casting the
5165// operands to the appropriate type. The type of the result is determined by P.
5167 const Permute &P, SDValue Op0, SDValue Op1) {
5168 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5169 // elements of a PACK are twice as wide as the outputs.
5170 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5171 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5172 P.Operand);
5173 // Cast both operands to the appropriate type.
5174 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5175 SystemZ::VectorBytes / InBytes);
5176 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5177 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5178 SDValue Op;
5179 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5180 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5181 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5182 } else if (P.Opcode == SystemZISD::PACK) {
5183 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5184 SystemZ::VectorBytes / P.Operand);
5185 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5186 } else {
5187 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5188 }
5189 return Op;
5190}
5191
5192static bool isZeroVector(SDValue N) {
5193 if (N->getOpcode() == ISD::BITCAST)
5194 N = N->getOperand(0);
5195 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5196 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5197 return Op->getZExtValue() == 0;
5198 return ISD::isBuildVectorAllZeros(N.getNode());
5199}
5200
5201// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5202static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5203 for (unsigned I = 0; I < Num ; I++)
5204 if (isZeroVector(Ops[I]))
5205 return I;
5206 return UINT32_MAX;
5207}
5208
5209// Bytes is a VPERM-like permute vector, except that -1 is used for
5210// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5211// VSLDB or VPERM.
5213 SDValue *Ops,
5214 const SmallVectorImpl<int> &Bytes) {
5215 for (unsigned I = 0; I < 2; ++I)
5216 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5217
5218 // First see whether VSLDB can be used.
5219 unsigned StartIndex, OpNo0, OpNo1;
5220 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5221 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5222 Ops[OpNo1],
5223 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5224
5225 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5226 // eliminate a zero vector by reusing any zero index in the permute vector.
5227 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5228 if (ZeroVecIdx != UINT32_MAX) {
5229 bool MaskFirst = true;
5230 int ZeroIdx = -1;
5231 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5232 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5233 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5234 if (OpNo == ZeroVecIdx && I == 0) {
5235 // If the first byte is zero, use mask as first operand.
5236 ZeroIdx = 0;
5237 break;
5238 }
5239 if (OpNo != ZeroVecIdx && Byte == 0) {
5240 // If mask contains a zero, use it by placing that vector first.
5241 ZeroIdx = I + SystemZ::VectorBytes;
5242 MaskFirst = false;
5243 break;
5244 }
5245 }
5246 if (ZeroIdx != -1) {
5247 SDValue IndexNodes[SystemZ::VectorBytes];
5248 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5249 if (Bytes[I] >= 0) {
5250 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5251 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5252 if (OpNo == ZeroVecIdx)
5253 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5254 else {
5255 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5256 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5257 }
5258 } else
5259 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5260 }
5261 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5262 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5263 if (MaskFirst)
5264 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5265 Mask);
5266 else
5267 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5268 Mask);
5269 }
5270 }
5271
5272 SDValue IndexNodes[SystemZ::VectorBytes];
5273 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5274 if (Bytes[I] >= 0)
5275 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5276 else
5277 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5278 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5279 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5280 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5281}
5282
5283namespace {
5284// Describes a general N-operand vector shuffle.
5285struct GeneralShuffle {
5286 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}
5287 void addUndef();
5288 bool add(SDValue, unsigned);
5289 SDValue getNode(SelectionDAG &, const SDLoc &);
5290 void tryPrepareForUnpack();
5291 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5292 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5293
5294 // The operands of the shuffle.
5296
5297 // Index I is -1 if byte I of the result is undefined. Otherwise the
5298 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5299 // Bytes[I] / SystemZ::VectorBytes.
5301
5302 // The type of the shuffle result.
5303 EVT VT;
5304
5305 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5306 unsigned UnpackFromEltSize;
5307};
5308}
5309
5310// Add an extra undefined element to the shuffle.
5311void GeneralShuffle::addUndef() {
5312 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5313 for (unsigned I = 0; I < BytesPerElement; ++I)
5314 Bytes.push_back(-1);
5315}
5316
5317// Add an extra element to the shuffle, taking it from element Elem of Op.
5318// A null Op indicates a vector input whose value will be calculated later;
5319// there is at most one such input per shuffle and it always has the same
5320// type as the result. Aborts and returns false if the source vector elements
5321// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
5322// LLVM they become implicitly extended, but this is rare and not optimized.
5323bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
5324 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5325
5326 // The source vector can have wider elements than the result,
5327 // either through an explicit TRUNCATE or because of type legalization.
5328 // We want the least significant part.
5329 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
5330 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
5331
5332 // Return false if the source elements are smaller than their destination
5333 // elements.
5334 if (FromBytesPerElement < BytesPerElement)
5335 return false;
5336
5337 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
5338 (FromBytesPerElement - BytesPerElement));
5339
5340 // Look through things like shuffles and bitcasts.
5341 while (Op.getNode()) {
5342 if (Op.getOpcode() == ISD::BITCAST)
5343 Op = Op.getOperand(0);
5344 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
5345 // See whether the bytes we need come from a contiguous part of one
5346 // operand.
5348 if (!getVPermMask(Op, OpBytes))
5349 break;
5350 int NewByte;
5351 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
5352 break;
5353 if (NewByte < 0) {
5354 addUndef();
5355 return true;
5356 }
5357 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
5358 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
5359 } else if (Op.isUndef()) {
5360 addUndef();
5361 return true;
5362 } else
5363 break;
5364 }
5365
5366 // Make sure that the source of the extraction is in Ops.
5367 unsigned OpNo = 0;
5368 for (; OpNo < Ops.size(); ++OpNo)
5369 if (Ops[OpNo] == Op)
5370 break;
5371 if (OpNo == Ops.size())
5372 Ops.push_back(Op);
5373
5374 // Add the element to Bytes.
5375 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
5376 for (unsigned I = 0; I < BytesPerElement; ++I)
5377 Bytes.push_back(Base + I);
5378
5379 return true;
5380}
5381
5382// Return SDNodes for the completed shuffle.
5383SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
5384 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
5385
5386 if (Ops.size() == 0)
5387 return DAG.getUNDEF(VT);
5388
5389 // Use a single unpack if possible as the last operation.
5390 tryPrepareForUnpack();
5391
5392 // Make sure that there are at least two shuffle operands.
5393 if (Ops.size() == 1)
5394 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
5395
5396 // Create a tree of shuffles, deferring root node until after the loop.
5397 // Try to redistribute the undefined elements of non-root nodes so that
5398 // the non-root shuffles match something like a pack or merge, then adjust
5399 // the parent node's permute vector to compensate for the new order.
5400 // Among other things, this copes with vectors like <2 x i16> that were
5401 // padded with undefined elements during type legalization.
5402 //
5403 // In the best case this redistribution will lead to the whole tree
5404 // using packs and merges. It should rarely be a loss in other cases.
5405 unsigned Stride = 1;
5406 for (; Stride * 2 < Ops.size(); Stride *= 2) {
5407 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
5408 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
5409
5410 // Create a mask for just these two operands.
5412 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5413 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
5414 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
5415 if (OpNo == I)
5416 NewBytes[J] = Byte;
5417 else if (OpNo == I + Stride)
5418 NewBytes[J] = SystemZ::VectorBytes + Byte;
5419 else
5420 NewBytes[J] = -1;
5421 }
5422 // See if it would be better to reorganize NewMask to avoid using VPERM.
5424 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
5425 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
5426 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
5427 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5428 if (NewBytes[J] >= 0) {
5429 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
5430 "Invalid double permute");
5431 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
5432 } else
5433 assert(NewBytesMap[J] < 0 && "Invalid double permute");
5434 }
5435 } else {
5436 // Just use NewBytes on the operands.
5437 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
5438 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
5439 if (NewBytes[J] >= 0)
5440 Bytes[J] = I * SystemZ::VectorBytes + J;
5441 }
5442 }
5443 }
5444
5445 // Now we just have 2 inputs. Put the second operand in Ops[1].
5446 if (Stride > 1) {
5447 Ops[1] = Ops[Stride];
5448 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5449 if (Bytes[I] >= int(SystemZ::VectorBytes))
5450 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
5451 }
5452
5453 // Look for an instruction that can do the permute without resorting
5454 // to VPERM.
5455 unsigned OpNo0, OpNo1;
5456 SDValue Op;
5457 if (unpackWasPrepared() && Ops[1].isUndef())
5458 Op = Ops[0];
5459 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
5460 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
5461 else
5462 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
5463
5464 Op = insertUnpackIfPrepared(DAG, DL, Op);
5465
5466 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5467}
5468
5469#ifndef NDEBUG
5470static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
5471 dbgs() << Msg.c_str() << " { ";
5472 for (unsigned i = 0; i < Bytes.size(); i++)
5473 dbgs() << Bytes[i] << " ";
5474 dbgs() << "}\n";
5475}
5476#endif
5477
5478// If the Bytes vector matches an unpack operation, prepare to do the unpack
5479// after all else by removing the zero vector and the effect of the unpack on
5480// Bytes.
5481void GeneralShuffle::tryPrepareForUnpack() {
5482 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
5483 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
5484 return;
5485
5486 // Only do this if removing the zero vector reduces the depth, otherwise
5487 // the critical path will increase with the final unpack.
5488 if (Ops.size() > 2 &&
5489 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
5490 return;
5491
5492 // Find an unpack that would allow removing the zero vector from Ops.
5493 UnpackFromEltSize = 1;
5494 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
5495 bool MatchUnpack = true;
5497 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
5498 unsigned ToEltSize = UnpackFromEltSize * 2;
5499 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
5500 if (!IsZextByte)
5501 SrcBytes.push_back(Bytes[Elt]);
5502 if (Bytes[Elt] != -1) {
5503 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
5504 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
5505 MatchUnpack = false;
5506 break;
5507 }
5508 }
5509 }
5510 if (MatchUnpack) {
5511 if (Ops.size() == 2) {
5512 // Don't use unpack if a single source operand needs rearrangement.
5513 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
5514 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
5515 UnpackFromEltSize = UINT_MAX;
5516 return;
5517 }
5518 }
5519 break;
5520 }
5521 }
5522 if (UnpackFromEltSize > 4)
5523 return;
5524
5525 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
5526 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
5527 << ".\n";
5528 dumpBytes(Bytes, "Original Bytes vector:"););
5529
5530 // Apply the unpack in reverse to the Bytes array.
5531 unsigned B = 0;
5532 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
5533 Elt += UnpackFromEltSize;
5534 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
5535 Bytes[B] = Bytes[Elt];
5536 }
5537 while (B < SystemZ::VectorBytes)
5538 Bytes[B++] = -1;
5539
5540 // Remove the zero vector from Ops
5541 Ops.erase(&Ops[ZeroVecOpNo]);
5542 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5543 if (Bytes[I] >= 0) {
5544 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5545 if (OpNo > ZeroVecOpNo)
5546 Bytes[I] -= SystemZ::VectorBytes;
5547 }
5548
5549 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
5550 dbgs() << "\n";);
5551}
5552
5553SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
5554 const SDLoc &DL,
5555 SDValue Op) {
5556 if (!unpackWasPrepared())
5557 return Op;
5558 unsigned InBits = UnpackFromEltSize * 8;
5559 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
5560 SystemZ::VectorBits / InBits);
5561 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
5562 unsigned OutBits = InBits * 2;
5563 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
5564 SystemZ::VectorBits / OutBits);
5565 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
5566}
5567
5568// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
5570 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
5571 if (!Op.getOperand(I).isUndef())
5572 return false;
5573 return true;
5574}
5575
5576// Return a vector of type VT that contains Value in the first element.
5577// The other elements don't matter.
5579 SDValue Value) {
5580 // If we have a constant, replicate it to all elements and let the
5581 // BUILD_VECTOR lowering take care of it.
5582 if (Value.getOpcode() == ISD::Constant ||
5583 Value.getOpcode() == ISD::ConstantFP) {
5585 return DAG.getBuildVector(VT, DL, Ops);
5586 }
5587 if (Value.isUndef())
5588 return DAG.getUNDEF(VT);
5589 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
5590}
5591
5592// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
5593// element 1. Used for cases in which replication is cheap.
5595 SDValue Op0, SDValue Op1) {
5596 if (Op0.isUndef()) {
5597 if (Op1.isUndef())
5598 return DAG.getUNDEF(VT);
5599 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
5600 }
5601 if (Op1.isUndef())
5602 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
5603 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
5604 buildScalarToVector(DAG, DL, VT, Op0),
5605 buildScalarToVector(DAG, DL, VT, Op1));
5606}
5607
5608// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
5609// vector for them.
5611 SDValue Op1) {
5612 if (Op0.isUndef() && Op1.isUndef())
5613 return DAG.getUNDEF(MVT::v2i64);
5614 // If one of the two inputs is undefined then replicate the other one,
5615 // in order to avoid using another register unnecessarily.
5616 if (Op0.isUndef())
5617 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5618 else if (Op1.isUndef())
5619 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5620 else {
5621 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5622 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5623 }
5624 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
5625}
5626
5627// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
5628// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
5629// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
5630// would benefit from this representation and return it if so.
5632 BuildVectorSDNode *BVN) {
5633 EVT VT = BVN->getValueType(0);
5634 unsigned NumElements = VT.getVectorNumElements();
5635
5636 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
5637 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
5638 // need a BUILD_VECTOR, add an additional placeholder operand for that
5639 // BUILD_VECTOR and store its operands in ResidueOps.
5640 GeneralShuffle GS(VT);
5642 bool FoundOne = false;
5643 for (unsigned I = 0; I < NumElements; ++I) {
5644 SDValue Op = BVN->getOperand(I);
5645 if (Op.getOpcode() == ISD::TRUNCATE)
5646 Op = Op.getOperand(0);
5647 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5648 Op.getOperand(1).getOpcode() == ISD::Constant) {
5649 unsigned Elem = Op.getConstantOperandVal(1);
5650 if (!GS.add(Op.getOperand(0), Elem))
5651 return SDValue();
5652 FoundOne = true;
5653 } else if (Op.isUndef()) {
5654 GS.addUndef();
5655 } else {
5656 if (!GS.add(SDValue(), ResidueOps.size()))
5657 return SDValue();
5658 ResidueOps.push_back(BVN->getOperand(I));
5659 }
5660 }
5661
5662 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
5663 if (!FoundOne)
5664 return SDValue();
5665
5666 // Create the BUILD_VECTOR for the remaining elements, if any.
5667 if (!ResidueOps.empty()) {
5668 while (ResidueOps.size() < NumElements)
5669 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
5670 for (auto &Op : GS.Ops) {
5671 if (!Op.getNode()) {
5672 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
5673 break;
5674 }
5675 }
5676 }
5677 return GS.getNode(DAG, SDLoc(BVN));
5678}
5679
5680bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
5681 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
5682 return true;
5683 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
5684 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
5685 return true;
5686 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
5687 return true;
5688 return false;
5689}
5690
5691// Combine GPR scalar values Elems into a vector of type VT.
5692SDValue
5693SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5694 SmallVectorImpl<SDValue> &Elems) const {
5695 // See whether there is a single replicated value.
5697 unsigned int NumElements = Elems.size();
5698 unsigned int Count = 0;
5699 for (auto Elem : Elems) {
5700 if (!Elem.isUndef()) {
5701 if (!Single.getNode())
5702 Single = Elem;
5703 else if (Elem != Single) {
5704 Single = SDValue();
5705 break;
5706 }
5707 Count += 1;
5708 }
5709 }
5710 // There are three cases here:
5711 //
5712 // - if the only defined element is a loaded one, the best sequence
5713 // is a replicating load.
5714 //
5715 // - otherwise, if the only defined element is an i64 value, we will
5716 // end up with the same VLVGP sequence regardless of whether we short-cut
5717 // for replication or fall through to the later code.
5718 //
5719 // - otherwise, if the only defined element is an i32 or smaller value,
5720 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
5721 // This is only a win if the single defined element is used more than once.
5722 // In other cases we're better off using a single VLVGx.
5723 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
5724 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
5725
5726 // If all elements are loads, use VLREP/VLEs (below).
5727 bool AllLoads = true;
5728 for (auto Elem : Elems)
5729 if (!isVectorElementLoad(Elem)) {
5730 AllLoads = false;
5731 break;
5732 }
5733
5734 // The best way of building a v2i64 from two i64s is to use VLVGP.
5735 if (VT == MVT::v2i64 && !AllLoads)
5736 return joinDwords(DAG, DL, Elems[0], Elems[1]);
5737
5738 // Use a 64-bit merge high to combine two doubles.
5739 if (VT == MVT::v2f64 && !AllLoads)
5740 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5741
5742 // Build v4f32 values directly from the FPRs:
5743 //
5744 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
5745 // V V VMRHF
5746 // <ABxx> <CDxx>
5747 // V VMRHG
5748 // <ABCD>
5749 if (VT == MVT::v4f32 && !AllLoads) {
5750 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5751 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
5752 // Avoid unnecessary undefs by reusing the other operand.
5753 if (Op01.isUndef())
5754 Op01 = Op23;
5755 else if (Op23.isUndef())
5756 Op23 = Op01;
5757 // Merging identical replications is a no-op.
5758 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
5759 return Op01;
5760 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
5761 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
5763 DL, MVT::v2i64, Op01, Op23);
5764 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5765 }
5766
5767 // Collect the constant terms.
5770
5771 unsigned NumConstants = 0;
5772 for (unsigned I = 0; I < NumElements; ++I) {
5773 SDValue Elem = Elems[I];
5774 if (Elem.getOpcode() == ISD::Constant ||
5775 Elem.getOpcode() == ISD::ConstantFP) {
5776 NumConstants += 1;
5777 Constants[I] = Elem;
5778 Done[I] = true;
5779 }
5780 }
5781 // If there was at least one constant, fill in the other elements of
5782 // Constants with undefs to get a full vector constant and use that
5783 // as the starting point.
5785 SDValue ReplicatedVal;
5786 if (NumConstants > 0) {
5787 for (unsigned I = 0; I < NumElements; ++I)
5788 if (!Constants[I].getNode())
5789 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
5790 Result = DAG.getBuildVector(VT, DL, Constants);
5791 } else {
5792 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
5793 // avoid a false dependency on any previous contents of the vector
5794 // register.
5795
5796 // Use a VLREP if at least one element is a load. Make sure to replicate
5797 // the load with the most elements having its value.
5798 std::map<const SDNode*, unsigned> UseCounts;
5799 SDNode *LoadMaxUses = nullptr;
5800 for (unsigned I = 0; I < NumElements; ++I)
5801 if (isVectorElementLoad(Elems[I])) {
5802 SDNode *Ld = Elems[I].getNode();
5803 UseCounts[Ld]++;
5804 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
5805 LoadMaxUses = Ld;
5806 }
5807 if (LoadMaxUses != nullptr) {
5808 ReplicatedVal = SDValue(LoadMaxUses, 0);
5809 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
5810 } else {
5811 // Try to use VLVGP.
5812 unsigned I1 = NumElements / 2 - 1;
5813 unsigned I2 = NumElements - 1;
5814 bool Def1 = !Elems[I1].isUndef();
5815 bool Def2 = !Elems[I2].isUndef();
5816 if (Def1 || Def2) {
5817 SDValue Elem1 = Elems[Def1 ? I1 : I2];
5818 SDValue Elem2 = Elems[Def2 ? I2 : I1];
5819 Result = DAG.getNode(ISD::BITCAST, DL, VT,
5820 joinDwords(DAG, DL, Elem1, Elem2));
5821 Done[I1] = true;
5822 Done[I2] = true;
5823 } else
5824 Result = DAG.getUNDEF(VT);
5825 }
5826 }
5827
5828 // Use VLVGx to insert the other elements.
5829 for (unsigned I = 0; I < NumElements; ++I)
5830 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
5831 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
5832 DAG.getConstant(I, DL, MVT::i32));
5833 return Result;
5834}
5835
5836SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
5837 SelectionDAG &DAG) const {
5838 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
5839 SDLoc DL(Op);
5840 EVT VT = Op.getValueType();
5841
5842 if (BVN->isConstant()) {
5843 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
5844 return Op;
5845
5846 // Fall back to loading it from memory.
5847 return SDValue();
5848 }
5849
5850 // See if we should use shuffles to construct the vector from other vectors.
5851 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
5852 return Res;
5853
5854 // Detect SCALAR_TO_VECTOR conversions.
5856 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
5857
5858 // Otherwise use buildVector to build the vector up from GPRs.
5859 unsigned NumElements = Op.getNumOperands();
5861 for (unsigned I = 0; I < NumElements; ++I)
5862 Ops[I] = Op.getOperand(I);
5863 return buildVector(DAG, DL, VT, Ops);
5864}
5865
5866SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
5867 SelectionDAG &DAG) const {
5868 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
5869 SDLoc DL(Op);
5870 EVT VT = Op.getValueType();
5871 unsigned NumElements = VT.getVectorNumElements();
5872
5873 if (VSN->isSplat()) {
5874 SDValue Op0 = Op.getOperand(0);
5875 unsigned Index = VSN->getSplatIndex();
5877 "Splat index should be defined and in first operand");
5878 // See whether the value we're splatting is directly available as a scalar.
5879 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5881 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
5882 // Otherwise keep it as a vector-to-vector operation.
5883 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
5884 DAG.getTargetConstant(Index, DL, MVT::i32));
5885 }
5886
5887 GeneralShuffle GS(VT);
5888 for (unsigned I = 0; I < NumElements; ++I) {
5889 int Elt = VSN->getMaskElt(I);
5890 if (Elt < 0)
5891 GS.addUndef();
5892 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
5893 unsigned(Elt) % NumElements))
5894 return SDValue();
5895 }
5896 return GS.getNode(DAG, SDLoc(VSN));
5897}
5898
5899SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
5900 SelectionDAG &DAG) const {
5901 SDLoc DL(Op);
5902 // Just insert the scalar into element 0 of an undefined vector.
5903 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
5904 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
5905 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
5906}
5907
5908SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
5909 SelectionDAG &DAG) const {
5910 // Handle insertions of floating-point values.
5911 SDLoc DL(Op);
5912 SDValue Op0 = Op.getOperand(0);
5913 SDValue Op1 = Op.getOperand(1);
5914 SDValue Op2 = Op.getOperand(2);
5915 EVT VT = Op.getValueType();
5916
5917 // Insertions into constant indices of a v2f64 can be done using VPDI.
5918 // However, if the inserted value is a bitcast or a constant then it's
5919 // better to use GPRs, as below.
5920 if (VT == MVT::v2f64 &&
5921 Op1.getOpcode() != ISD::BITCAST &&
5922 Op1.getOpcode() != ISD::ConstantFP &&
5923 Op2.getOpcode() == ISD::Constant) {
5924 uint64_t Index = Op2->getAsZExtVal();
5925 unsigned Mask = VT.getVectorNumElements() - 1;
5926 if (Index <= Mask)
5927 return Op;
5928 }
5929
5930 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
5932 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
5933 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
5934 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
5935 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
5936 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5937}
5938
5939SDValue
5940SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
5941 SelectionDAG &DAG) const {
5942 // Handle extractions of floating-point values.
5943 SDLoc DL(Op);
5944 SDValue Op0 = Op.getOperand(0);
5945 SDValue Op1 = Op.getOperand(1);
5946 EVT VT = Op.getValueType();
5947 EVT VecVT = Op0.getValueType();
5948
5949 // Extractions of constant indices can be done directly.
5950 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
5951 uint64_t Index = CIndexN->getZExtValue();
5952 unsigned Mask = VecVT.getVectorNumElements() - 1;
5953 if (Index <= Mask)
5954 return Op;
5955 }
5956
5957 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
5958 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
5959 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
5960 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
5961 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
5962 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5963}
5964
5965SDValue SystemZTargetLowering::
5966lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5967 SDValue PackedOp = Op.getOperand(0);
5968 EVT OutVT = Op.getValueType();
5969 EVT InVT = PackedOp.getValueType();
5970 unsigned ToBits = OutVT.getScalarSizeInBits();
5971 unsigned FromBits = InVT.getScalarSizeInBits();
5972 do {
5973 FromBits *= 2;
5974 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
5975 SystemZ::VectorBits / FromBits);
5976 PackedOp =
5977 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
5978 } while (FromBits != ToBits);
5979 return PackedOp;
5980}
5981
5982// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
5983SDValue SystemZTargetLowering::
5984lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5985 SDValue PackedOp = Op.getOperand(0);
5986 SDLoc DL(Op);
5987 EVT OutVT = Op.getValueType();
5988 EVT InVT = PackedOp.getValueType();
5989 unsigned InNumElts = InVT.getVectorNumElements();
5990 unsigned OutNumElts = OutVT.getVectorNumElements();
5991 unsigned NumInPerOut = InNumElts / OutNumElts;
5992
5993 SDValue ZeroVec =
5994 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
5995
5996 SmallVector<int, 16> Mask(InNumElts);
5997 unsigned ZeroVecElt = InNumElts;
5998 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
5999 unsigned MaskElt = PackedElt * NumInPerOut;
6000 unsigned End = MaskElt + NumInPerOut - 1;
6001 for (; MaskElt < End; MaskElt++)
6002 Mask[MaskElt] = ZeroVecElt++;
6003 Mask[MaskElt] = PackedElt;
6004 }
6005 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
6006 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
6007}
6008
6009SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6010 unsigned ByScalar) const {
6011 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6012 SDValue Op0 = Op.getOperand(0);
6013 SDValue Op1 = Op.getOperand(1);
6014 SDLoc DL(Op);
6015 EVT VT = Op.getValueType();
6016 unsigned ElemBitSize = VT.getScalarSizeInBits();
6017
6018 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6019 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6020 APInt SplatBits, SplatUndef;
6021 unsigned SplatBitSize;
6022 bool HasAnyUndefs;
6023 // Check for constant splats. Use ElemBitSize as the minimum element
6024 // width and reject splats that need wider elements.
6025 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6026 ElemBitSize, true) &&
6027 SplatBitSize == ElemBitSize) {
6028 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6029 DL, MVT::i32);
6030 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6031 }
6032 // Check for variable splats.
6033 BitVector UndefElements;
6034 SDValue Splat = BVN->getSplatValue(&UndefElements);
6035 if (Splat) {
6036 // Since i32 is the smallest legal type, we either need a no-op
6037 // or a truncation.
6038 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6039 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6040 }
6041 }
6042
6043 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6044 // and the shift amount is directly available in a GPR.
6045 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6046 if (VSN->isSplat()) {
6047 SDValue VSNOp0 = VSN->getOperand(0);
6048 unsigned Index = VSN->getSplatIndex();
6050 "Splat index should be defined and in first operand");
6051 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6052 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6053 // Since i32 is the smallest legal type, we either need a no-op
6054 // or a truncation.
6055 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6056 VSNOp0.getOperand(Index));
6057 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6058 }
6059 }
6060 }
6061
6062 // Otherwise just treat the current form as legal.
6063 return Op;
6064}
6065
6066SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
6067 SelectionDAG &DAG) const {
6068 SDLoc DL(Op);
6069 MVT ResultVT = Op.getSimpleValueType();
6070 SDValue Arg = Op.getOperand(0);
6071 unsigned Check = Op.getConstantOperandVal(1);
6072
6073 unsigned TDCMask = 0;
6074 if (Check & fcSNan)
6076 if (Check & fcQNan)
6078 if (Check & fcPosInf)
6080 if (Check & fcNegInf)
6082 if (Check & fcPosNormal)
6084 if (Check & fcNegNormal)
6086 if (Check & fcPosSubnormal)
6088 if (Check & fcNegSubnormal)
6090 if (Check & fcPosZero)
6091 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
6092 if (Check & fcNegZero)
6093 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
6094 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
6095
6096 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
6097 return getCCResult(DAG, Intr);
6098}
6099
6100SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
6101 SelectionDAG &DAG) const {
6102 SDLoc DL(Op);
6103 SDValue Chain = Op.getOperand(0);
6104
6105 // STCKF only supports a memory operand, so we have to use a temporary.
6106 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
6107 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
6108 MachinePointerInfo MPI =
6110
6111 // Use STCFK to store the TOD clock into the temporary.
6112 SDValue StoreOps[] = {Chain, StackPtr};
6113 Chain = DAG.getMemIntrinsicNode(
6114 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
6116
6117 // And read it back from there.
6118 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
6119}
6120
6122 SelectionDAG &DAG) const {
6123 switch (Op.getOpcode()) {
6124 case ISD::FRAMEADDR:
6125 return lowerFRAMEADDR(Op, DAG);
6126 case ISD::RETURNADDR:
6127 return lowerRETURNADDR(Op, DAG);
6128 case ISD::BR_CC:
6129 return lowerBR_CC(Op, DAG);
6130 case ISD::SELECT_CC:
6131 return lowerSELECT_CC(Op, DAG);
6132 case ISD::SETCC:
6133 return lowerSETCC(Op, DAG);
6134 case ISD::STRICT_FSETCC:
6135 return lowerSTRICT_FSETCC(Op, DAG, false);
6137 return lowerSTRICT_FSETCC(Op, DAG, true);
6138 case ISD::GlobalAddress:
6139 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
6141 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
6142 case ISD::BlockAddress:
6143 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
6144 case ISD::JumpTable:
6145 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
6146 case ISD::ConstantPool:
6147 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
6148 case ISD::BITCAST:
6149 return lowerBITCAST(Op, DAG);
6150 case ISD::VASTART:
6151 return lowerVASTART(Op, DAG);
6152 case ISD::VACOPY:
6153 return lowerVACOPY(Op, DAG);
6155 return lowerDYNAMIC_STACKALLOC(Op, DAG);
6157 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
6158 case ISD::SMUL_LOHI:
6159 return lowerSMUL_LOHI(Op, DAG);
6160 case ISD::UMUL_LOHI:
6161 return lowerUMUL_LOHI(Op, DAG);
6162 case ISD::SDIVREM:
6163 return lowerSDIVREM(Op, DAG);
6164 case ISD::UDIVREM:
6165 return lowerUDIVREM(Op, DAG);
6166 case ISD::SADDO:
6167 case ISD::SSUBO:
6168 case ISD::UADDO:
6169 case ISD::USUBO:
6170 return lowerXALUO(Op, DAG);
6171 case ISD::UADDO_CARRY:
6172 case ISD::USUBO_CARRY:
6173 return lowerUADDSUBO_CARRY(Op, DAG);
6174 case ISD::OR:
6175 return lowerOR(Op, DAG);
6176 case ISD::CTPOP:
6177 return lowerCTPOP(Op, DAG);
6178 case ISD::VECREDUCE_ADD:
6179 return lowerVECREDUCE_ADD(Op, DAG);
6180 case ISD::ATOMIC_FENCE:
6181 return lowerATOMIC_FENCE(Op, DAG);
6182 case ISD::ATOMIC_SWAP:
6183 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
6184 case ISD::ATOMIC_STORE:
6185 case ISD::ATOMIC_LOAD:
6186 return lowerATOMIC_LDST_I128(Op, DAG);
6188 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
6190 return lowerATOMIC_LOAD_SUB(Op, DAG);
6192 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
6194 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
6196 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
6198 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
6200 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
6202 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
6204 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
6206 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
6208 return lowerATOMIC_CMP_SWAP(Op, DAG);
6209 case ISD::STACKSAVE:
6210 return lowerSTACKSAVE(Op, DAG);
6211 case ISD::STACKRESTORE:
6212 return lowerSTACKRESTORE(Op, DAG);
6213 case ISD::PREFETCH:
6214 return lowerPREFETCH(Op, DAG);
6216 return lowerINTRINSIC_W_CHAIN(Op, DAG);
6218 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
6219 case ISD::BUILD_VECTOR:
6220 return lowerBUILD_VECTOR(Op, DAG);
6222 return lowerVECTOR_SHUFFLE(Op, DAG);
6224 return lowerSCALAR_TO_VECTOR(Op, DAG);
6226 return lowerINSERT_VECTOR_ELT(Op, DAG);
6228 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6230 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
6232 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
6233 case ISD::SHL:
6234 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
6235 case ISD::SRL:
6236 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
6237 case ISD::SRA:
6238 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
6239 case ISD::ROTL:
6240 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
6241 case ISD::IS_FPCLASS:
6242 return lowerIS_FPCLASS(Op, DAG);
6243 case ISD::GET_ROUNDING:
6244 return lowerGET_ROUNDING(Op, DAG);
6246 return lowerREADCYCLECOUNTER(Op, DAG);
6247 default:
6248 llvm_unreachable("Unexpected node to lower");
6249 }
6250}
6251
6252// Lower operations with invalid operand or result types (currently used
6253// only for 128-bit integer types).
6254void
6257 SelectionDAG &DAG) const {
6258 switch (N->getOpcode()) {
6259 case ISD::ATOMIC_LOAD: {
6260 SDLoc DL(N);
6261 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
6262 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
6263 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6265 DL, Tys, Ops, MVT::i128, MMO);
6266 Results.push_back(lowerGR128ToI128(DAG, Res));
6267 Results.push_back(Res.getValue(1));
6268 break;
6269 }
6270 case ISD::ATOMIC_STORE: {
6271 SDLoc DL(N);
6272 SDVTList Tys = DAG.getVTList(MVT::Other);
6273 SDValue Ops[] = {N->getOperand(0), lowerI128ToGR128(DAG, N->getOperand(1)),
6274 N->getOperand(2)};
6275 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6277 DL, Tys, Ops, MVT::i128, MMO);
6278 // We have to enforce sequential consistency by performing a
6279 // serialization operation after the store.
6280 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
6282 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
6283 MVT::Other, Res), 0);
6284 Results.push_back(Res);
6285 break;
6286 }
6288 SDLoc DL(N);
6289 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
6290 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
6291 lowerI128ToGR128(DAG, N->getOperand(2)),
6292 lowerI128ToGR128(DAG, N->getOperand(3)) };
6293 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6295 DL, Tys, Ops, MVT::i128, MMO);
6296 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
6298 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
6299 Results.push_back(lowerGR128ToI128(DAG, Res));
6300 Results.push_back(Success);
6301 Results.push_back(Res.getValue(2));
6302 break;
6303 }
6304 case ISD::BITCAST: {
6305 SDValue Src = N->getOperand(0);
6306 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
6307 !useSoftFloat()) {
6308 SDLoc DL(N);
6309 SDValue Lo, Hi;
6310 if (getRepRegClassFor(MVT::f128) == &SystemZ::VR128BitRegClass) {
6311 SDValue VecBC = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Src);
6312 Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
6313 DAG.getConstant(1, DL, MVT::i32));
6314 Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
6315 DAG.getConstant(0, DL, MVT::i32));
6316 } else {
6317 assert(getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass &&
6318 "Unrecognized register class for f128.");
6319 SDValue LoFP = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
6320 DL, MVT::f64, Src);
6321 SDValue HiFP = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
6322 DL, MVT::f64, Src);
6323 Lo = DAG.getNode(ISD::BITCAST, DL, MVT::i64, LoFP);
6324 Hi = DAG.getNode(ISD::BITCAST, DL, MVT::i64, HiFP);
6325 }
6326 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi));
6327 }
6328 break;
6329 }
6330 default:
6331 llvm_unreachable("Unexpected node to lower");
6332 }
6333}
6334
6335void
6338 SelectionDAG &DAG) const {
6339 return LowerOperationWrapper(N, Results, DAG);
6340}
6341
6342const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
6343#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
6344 switch ((SystemZISD::NodeType)Opcode) {
6345 case SystemZISD::FIRST_NUMBER: break;
6346 OPCODE(RET_GLUE);
6347 OPCODE(CALL);
6348 OPCODE(SIBCALL);
6349 OPCODE(TLS_GDCALL);
6350 OPCODE(TLS_LDCALL);
6351 OPCODE(PCREL_WRAPPER);
6352 OPCODE(PCREL_OFFSET);
6353 OPCODE(ICMP);
6354 OPCODE(FCMP);
6355 OPCODE(STRICT_FCMP);
6356 OPCODE(STRICT_FCMPS);
6357 OPCODE(TM);
6358 OPCODE(BR_CCMASK);
6359 OPCODE(SELECT_CCMASK);
6360 OPCODE(ADJDYNALLOC);
6361 OPCODE(PROBED_ALLOCA);
6362 OPCODE(POPCNT);
6363 OPCODE(SMUL_LOHI);
6364 OPCODE(UMUL_LOHI);
6365 OPCODE(SDIVREM);
6366 OPCODE(UDIVREM);
6367 OPCODE(SADDO);
6368 OPCODE(SSUBO);
6369 OPCODE(UADDO);
6370 OPCODE(USUBO);
6371 OPCODE(ADDCARRY);
6372 OPCODE(SUBCARRY);
6373 OPCODE(GET_CCMASK);
6374 OPCODE(MVC);
6375 OPCODE(NC);
6376 OPCODE(OC);
6377 OPCODE(XC);
6378 OPCODE(CLC);
6379 OPCODE(MEMSET_MVC);
6380 OPCODE(STPCPY);
6381 OPCODE(STRCMP);
6382 OPCODE(SEARCH_STRING);
6383 OPCODE(IPM);
6384 OPCODE(TBEGIN);
6385 OPCODE(TBEGIN_NOFLOAT);
6386 OPCODE(TEND);
6387 OPCODE(BYTE_MASK);
6388 OPCODE(ROTATE_MASK);
6389 OPCODE(REPLICATE);
6390 OPCODE(JOIN_DWORDS);
6391 OPCODE(SPLAT);
6392 OPCODE(MERGE_HIGH);
6393 OPCODE(MERGE_LOW);
6394 OPCODE(SHL_DOUBLE);
6395 OPCODE(PERMUTE_DWORDS);
6396 OPCODE(PERMUTE);
6397 OPCODE(PACK);
6398 OPCODE(PACKS_CC);
6399 OPCODE(PACKLS_CC);
6400 OPCODE(UNPACK_HIGH);
6401 OPCODE(UNPACKL_HIGH);
6402 OPCODE(UNPACK_LOW);
6403 OPCODE(UNPACKL_LOW);
6404 OPCODE(VSHL_BY_SCALAR);
6405 OPCODE(VSRL_BY_SCALAR);
6406 OPCODE(VSRA_BY_SCALAR);
6407 OPCODE(VROTL_BY_SCALAR);
6408 OPCODE(VSUM);
6409 OPCODE(VACC);
6410 OPCODE(VSCBI);
6411 OPCODE(VAC);
6412 OPCODE(VSBI);
6413 OPCODE(VACCC);
6414 OPCODE(VSBCBI);
6415 OPCODE(VICMPE);
6416 OPCODE(VICMPH);
6417 OPCODE(VICMPHL);
6418 OPCODE(VICMPES);
6419 OPCODE(VICMPHS);
6420 OPCODE(VICMPHLS);
6421 OPCODE(VFCMPE);
6422 OPCODE(STRICT_VFCMPE);
6423 OPCODE(STRICT_VFCMPES);
6424 OPCODE(VFCMPH);
6425 OPCODE(STRICT_VFCMPH);
6426 OPCODE(STRICT_VFCMPHS);
6427 OPCODE(VFCMPHE);
6428 OPCODE(STRICT_VFCMPHE);
6429 OPCODE(STRICT_VFCMPHES);
6430 OPCODE(VFCMPES);
6431 OPCODE(VFCMPHS);
6432 OPCODE(VFCMPHES);
6433 OPCODE(VFTCI);
6434 OPCODE(VEXTEND);
6435 OPCODE(STRICT_VEXTEND);
6436 OPCODE(VROUND);
6437 OPCODE(STRICT_VROUND);
6438 OPCODE(VTM);
6439 OPCODE(SCMP128HI);
6440 OPCODE(UCMP128HI);
6441 OPCODE(VFAE_CC);
6442 OPCODE(VFAEZ_CC);
6443 OPCODE(VFEE_CC);
6444 OPCODE(VFEEZ_CC);
6445 OPCODE(VFENE_CC);
6446 OPCODE(VFENEZ_CC);
6447 OPCODE(VISTR_CC);
6448 OPCODE(VSTRC_CC);
6449 OPCODE(VSTRCZ_CC);
6450 OPCODE(VSTRS_CC);
6451 OPCODE(VSTRSZ_CC);
6452 OPCODE(TDC);
6453 OPCODE(ATOMIC_SWAPW);
6454 OPCODE(ATOMIC_LOADW_ADD);
6455 OPCODE(ATOMIC_LOADW_SUB);
6456 OPCODE(ATOMIC_LOADW_AND);
6457 OPCODE(ATOMIC_LOADW_OR);
6458 OPCODE(ATOMIC_LOADW_XOR);
6459 OPCODE(ATOMIC_LOADW_NAND);
6460 OPCODE(ATOMIC_LOADW_MIN);
6461 OPCODE(ATOMIC_LOADW_MAX);
6462 OPCODE(ATOMIC_LOADW_UMIN);
6463 OPCODE(ATOMIC_LOADW_UMAX);
6464 OPCODE(ATOMIC_CMP_SWAPW);
6465 OPCODE(ATOMIC_CMP_SWAP);
6466 OPCODE(ATOMIC_LOAD_128);
6467 OPCODE(ATOMIC_STORE_128);
6468 OPCODE(ATOMIC_CMP_SWAP_128);
6469 OPCODE(LRV);
6470 OPCODE(STRV);
6471 OPCODE(VLER);
6472 OPCODE(VSTER);
6473 OPCODE(STCKF);
6475 OPCODE(ADA_ENTRY);
6476 }
6477 return nullptr;
6478#undef OPCODE
6479}
6480
6481// Return true if VT is a vector whose elements are a whole number of bytes
6482// in width. Also check for presence of vector support.
6483bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
6484 if (!Subtarget.hasVector())
6485 return false;
6486
6487 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
6488}
6489
6490// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
6491// producing a result of type ResVT. Op is a possibly bitcast version
6492// of the input vector and Index is the index (based on type VecVT) that
6493// should be extracted. Return the new extraction if a simplification
6494// was possible or if Force is true.
6495SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
6496 EVT VecVT, SDValue Op,
6497 unsigned Index,
6498 DAGCombinerInfo &DCI,
6499 bool Force) const {
6500 SelectionDAG &DAG = DCI.DAG;
6501
6502 // The number of bytes being extracted.
6503 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6504
6505 for (;;) {
6506 unsigned Opcode = Op.getOpcode();
6507 if (Opcode == ISD::BITCAST)
6508 // Look through bitcasts.
6509 Op = Op.getOperand(0);
6510 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
6511 canTreatAsByteVector(Op.getValueType())) {
6512 // Get a VPERM-like permute mask and see whether the bytes covered
6513 // by the extracted element are a contiguous sequence from one
6514 // source operand.
6516 if (!getVPermMask(Op, Bytes))
6517 break;
6518 int First;
6519 if (!getShuffleInput(Bytes, Index * BytesPerElement,
6520 BytesPerElement, First))
6521 break;
6522 if (First < 0)
6523 return DAG.getUNDEF(ResVT);
6524 // Make sure the contiguous sequence starts at a multiple of the
6525 // original element size.
6526 unsigned Byte = unsigned(First) % Bytes.size();
6527 if (Byte % BytesPerElement != 0)
6528 break;
6529 // We can get the extracted value directly from an input.
6530 Index = Byte / BytesPerElement;
6531 Op = Op.getOperand(unsigned(First) / Bytes.size());
6532 Force = true;
6533 } else if (Opcode == ISD::BUILD_VECTOR &&
6534 canTreatAsByteVector(Op.getValueType())) {
6535 // We can only optimize this case if the BUILD_VECTOR elements are
6536 // at least as wide as the extracted value.
6537 EVT OpVT = Op.getValueType();
6538 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6539 if (OpBytesPerElement < BytesPerElement)
6540 break;
6541 // Make sure that the least-significant bit of the extracted value
6542 // is the least significant bit of an input.
6543 unsigned End = (Index + 1) * BytesPerElement;
6544 if (End % OpBytesPerElement != 0)
6545 break;
6546 // We're extracting the low part of one operand of the BUILD_VECTOR.
6547 Op = Op.getOperand(End / OpBytesPerElement - 1);
6548 if (!Op.getValueType().isInteger()) {
6549 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
6550 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
6551 DCI.AddToWorklist(Op.getNode());
6552 }
6553 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
6554 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
6555 if (VT != ResVT) {
6556 DCI.AddToWorklist(Op.getNode());
6557 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
6558 }
6559 return Op;
6560 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6562 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6563 canTreatAsByteVector(Op.getValueType()) &&
6564 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
6565 // Make sure that only the unextended bits are significant.
6566 EVT ExtVT = Op.getValueType();
6567 EVT OpVT = Op.getOperand(0).getValueType();
6568 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
6569 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6570 unsigned Byte = Index * BytesPerElement;
6571 unsigned SubByte = Byte % ExtBytesPerElement;
6572 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
6573 if (SubByte < MinSubByte ||
6574 SubByte + BytesPerElement > ExtBytesPerElement)
6575 break;
6576 // Get the byte offset of the unextended element
6577 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
6578 // ...then add the byte offset relative to that element.
6579 Byte += SubByte - MinSubByte;
6580 if (Byte % BytesPerElement != 0)
6581 break;
6582 Op = Op.getOperand(0);
6583 Index = Byte / BytesPerElement;
6584 Force = true;
6585 } else
6586 break;
6587 }
6588 if (Force) {
6589 if (Op.getValueType() != VecVT) {
6590 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
6591 DCI.AddToWorklist(Op.getNode());
6592 }
6593 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
6594 DAG.getConstant(Index, DL, MVT::i32));
6595 }
6596 return SDValue();
6597}
6598
6599// Optimize vector operations in scalar value Op on the basis that Op
6600// is truncated to TruncVT.
6601SDValue SystemZTargetLowering::combineTruncateExtract(
6602 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
6603 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
6604 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
6605 // of type TruncVT.
6606 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6607 TruncVT.getSizeInBits() % 8 == 0) {
6608 SDValue Vec = Op.getOperand(0);
6609 EVT VecVT = Vec.getValueType();
6610 if (canTreatAsByteVector(VecVT)) {
6611 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
6612 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6613 unsigned TruncBytes = TruncVT.getStoreSize();
6614 if (BytesPerElement % TruncBytes == 0) {
6615 // Calculate the value of Y' in the above description. We are
6616 // splitting the original elements into Scale equal-sized pieces
6617 // and for truncation purposes want the last (least-significant)
6618 // of these pieces for IndexN. This is easiest to do by calculating
6619 // the start index of the following element and then subtracting 1.
6620 unsigned Scale = BytesPerElement / TruncBytes;
6621 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
6622
6623 // Defer the creation of the bitcast from X to combineExtract,
6624 // which might be able to optimize the extraction.
6625 VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
6626 VecVT.getStoreSize() / TruncBytes);
6627 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
6628 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
6629 }
6630 }
6631 }
6632 }
6633 return SDValue();
6634}
6635
6636SDValue SystemZTargetLowering::combineZERO_EXTEND(
6637 SDNode *N, DAGCombinerInfo &DCI) const {
6638 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
6639 SelectionDAG &DAG = DCI.DAG;
6640 SDValue N0 = N->getOperand(0);
6641 EVT VT = N->getValueType(0);
6643 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
6644 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6645 if (TrueOp && FalseOp) {
6646 SDLoc DL(N0);
6647 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
6648 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
6649 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
6650 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
6651 // If N0 has multiple uses, change other uses as well.
6652 if (!N0.hasOneUse()) {
6653 SDValue TruncSelect =
6654 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
6655 DCI.CombineTo(N0.getNode(), TruncSelect);
6656 }
6657 return NewSelect;
6658 }
6659 }
6660 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
6661 // of the result is smaller than the size of X and all the truncated bits
6662 // of X are already zero.
6663 if (N0.getOpcode() == ISD::XOR &&
6664 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
6665 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6666 N0.getOperand(1).getOpcode() == ISD::Constant) {
6667 SDValue X = N0.getOperand(0).getOperand(0);
6668 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
6669 KnownBits Known = DAG.computeKnownBits(X);
6670 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
6671 N0.getValueSizeInBits(),
6672 VT.getSizeInBits());
6673 if (TruncatedBits.isSubsetOf(Known.Zero)) {
6674 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
6676 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
6677 X, DAG.getConstant(Mask, SDLoc(N0), VT));
6678 }
6679 }
6680 }
6681
6682 return SDValue();
6683}
6684
6685SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
6686 SDNode *N, DAGCombinerInfo &DCI) const {
6687 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
6688 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
6689 // into (select_cc LHS, RHS, -1, 0, COND)
6690 SelectionDAG &DAG = DCI.DAG;
6691 SDValue N0 = N->getOperand(0);
6692 EVT VT = N->getValueType(0);
6693 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
6694 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
6695 N0 = N0.getOperand(0);
6696 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
6697 SDLoc DL(N0);
6698 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
6699 DAG.getConstant(-1, DL, VT), DAG.getConstant(0, DL, VT),
6700 N0.getOperand(2) };
6701 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
6702 }
6703 return SDValue();
6704}
6705
6706SDValue SystemZTargetLowering::combineSIGN_EXTEND(
6707 SDNode *N, DAGCombinerInfo &DCI) const {
6708 // Convert (sext (ashr (shl X, C1), C2)) to
6709 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
6710 // cheap as narrower ones.
6711 SelectionDAG &DAG = DCI.DAG;
6712 SDValue N0 = N->getOperand(0);
6713 EVT VT = N->getValueType(0);
6714 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
6715 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6716 SDValue Inner = N0.getOperand(0);
6717 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
6718 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
6719 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
6720 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
6721 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
6722 EVT ShiftVT = N0.getOperand(1).getValueType();
6723 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
6724 Inner.getOperand(0));
6725 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
6726 DAG.getConstant(NewShlAmt, SDLoc(Inner),
6727 ShiftVT));
6728 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
6729 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
6730 }
6731 }
6732 }
6733
6734 return SDValue();
6735}
6736
6737SDValue SystemZTargetLowering::combineMERGE(
6738 SDNode *N, DAGCombinerInfo &DCI) const {
6739 SelectionDAG &DAG = DCI.DAG;
6740 unsigned Opcode = N->getOpcode();
6741 SDValue Op0 = N->getOperand(0);
6742 SDValue Op1 = N->getOperand(1);
6743 if (Op0.getOpcode() == ISD::BITCAST)
6744 Op0 = Op0.getOperand(0);
6746 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
6747 // for v4f32.
6748 if (Op1 == N->getOperand(0))
6749 return Op1;
6750 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
6751 EVT VT = Op1.getValueType();
6752 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
6753 if (ElemBytes <= 4) {
6754 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
6757 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
6758 SystemZ::VectorBytes / ElemBytes / 2);
6759 if (VT != InVT) {
6760 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
6761 DCI.AddToWorklist(Op1.getNode());
6762 }
6763 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
6764 DCI.AddToWorklist(Op.getNode());
6765 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
6766 }
6767 }
6768 return SDValue();
6769}
6770
6771SDValue SystemZTargetLowering::combineLOAD(
6772 SDNode *N, DAGCombinerInfo &DCI) const {
6773 SelectionDAG &DAG = DCI.DAG;
6774 EVT LdVT = N->getValueType(0);
6775 SDLoc DL(N);
6776
6777 // Replace an i128 load that is used solely to move its value into GPRs
6778 // by separate loads of both halves.
6779 if (LdVT == MVT::i128) {
6780 LoadSDNode *LD = cast<LoadSDNode>(N);
6781 if (!LD->isSimple() || !ISD::isNormalLoad(LD))
6782 return SDValue();
6783
6784 // Scan through all users.
6786 int UsedElements = 0;
6787 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
6788 UI != UIEnd; ++UI) {
6789 // Skip the uses of the chain.
6790 if (UI.getUse().getResNo() != 0)
6791 continue;
6792
6793 // Verify every user is a TRUNCATE to i64 of the low or high half ...
6794 SDNode *User = *UI;
6795 int Index = 1;
6796 if (User->getOpcode() == ISD::SRL &&
6797 User->getOperand(1).getOpcode() == ISD::Constant &&
6798 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
6799 User = *User->use_begin();
6800 Index = 0;
6801 }
6802 if (User->getOpcode() != ISD::TRUNCATE ||
6803 User->getValueType(0) != MVT::i64)
6804 return SDValue();
6805
6806 // ... and no half is extracted twice.
6807 if (UsedElements & (1 << Index))
6808 return SDValue();
6809
6810 UsedElements |= 1 << Index;
6811 Users.push_back(std::make_pair(User, Index));
6812 }
6813
6814 // Rewrite each extraction as an independent load.
6815 SmallVector<SDValue, 2> ArgChains;
6816 for (auto UserAndIndex : Users) {
6817 SDNode *User = UserAndIndex.first;
6818 unsigned Offset = User->getValueType(0).getStoreSize() * UserAndIndex.second;
6819 SDValue Ptr =
6820 DAG.getMemBasePlusOffset(LD->getBasePtr(), TypeSize::getFixed(Offset), DL);
6821 SDValue EltLoad =
6822 DAG.getLoad(User->getValueType(0), DL, LD->getChain(), Ptr,
6823 LD->getPointerInfo().getWithOffset(Offset),
6824 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
6825 LD->getAAInfo());
6826
6827 DCI.CombineTo(User, EltLoad, true);
6828 ArgChains.push_back(EltLoad.getValue(1));
6829 }
6830
6831 // Collect all chains via TokenFactor.
6832 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
6833 ArgChains);
6834 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
6835 DCI.AddToWorklist(Chain.getNode());
6836 return SDValue(N, 0);
6837 }
6838
6839 if (LdVT.isVector() || LdVT.isInteger())
6840 return SDValue();
6841 // Transform a scalar load that is REPLICATEd as well as having other
6842 // use(s) to the form where the other use(s) use the first element of the
6843 // REPLICATE instead of the load. Otherwise instruction selection will not
6844 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
6845 // point loads.
6846
6847 SDValue Replicate;
6848 SmallVector<SDNode*, 8> OtherUses;
6849 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
6850 UI != UE; ++UI) {
6851 if (UI->getOpcode() == SystemZISD::REPLICATE) {
6852 if (Replicate)
6853 return SDValue(); // Should never happen
6854 Replicate = SDValue(*UI, 0);
6855 }
6856 else if (UI.getUse().getResNo() == 0)
6857 OtherUses.push_back(*UI);
6858 }
6859 if (!Replicate || OtherUses.empty())
6860 return SDValue();
6861
6862 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
6863 Replicate, DAG.getConstant(0, DL, MVT::i32));
6864 // Update uses of the loaded Value while preserving old chains.
6865 for (SDNode *U : OtherUses) {
6867 for (SDValue Op : U->ops())
6868 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
6869 DAG.UpdateNodeOperands(U, Ops);
6870 }
6871 return SDValue(N, 0);
6872}
6873
6874bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
6875 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
6876 return true;
6877 if (Subtarget.hasVectorEnhancements2())
6878 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
6879 return true;
6880 return false;
6881}
6882
6884 if (!VT.isVector() || !VT.isSimple() ||
6885 VT.getSizeInBits() != 128 ||
6886 VT.getScalarSizeInBits() % 8 != 0)
6887 return false;
6888
6889 unsigned NumElts = VT.getVectorNumElements();
6890 for (unsigned i = 0; i < NumElts; ++i) {
6891 if (M[i] < 0) continue; // ignore UNDEF indices
6892 if ((unsigned) M[i] != NumElts - 1 - i)
6893 return false;
6894 }
6895
6896 return true;
6897}
6898
6899static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
6900 for (auto *U : StoredVal->uses()) {
6901 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
6902 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
6903 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
6904 continue;
6905 } else if (isa<BuildVectorSDNode>(U)) {
6906 SDValue BuildVector = SDValue(U, 0);
6907 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
6908 isOnlyUsedByStores(BuildVector, DAG))
6909 continue;
6910 }
6911 return false;
6912 }
6913 return true;
6914}
6915
6916static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart) {
6917 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
6918 return false;
6919
6920 SDValue Op0 = Val.getOperand(0);
6921 SDValue Op1 = Val.getOperand(1);
6922
6923 if (Op0.getOpcode() == ISD::SHL)
6924 std::swap(Op0, Op1);
6925 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
6926 Op1.getOperand(1).getOpcode() != ISD::Constant ||
6927 Op1.getConstantOperandVal(1) != 64)
6928 return false;
6929 Op1 = Op1.getOperand(0);
6930
6931 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
6932 Op0.getOperand(0).getValueType() != MVT::i64)
6933 return false;
6934 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
6935 Op1.getOperand(0).getValueType() != MVT::i64)
6936 return false;
6937
6938 LoPart = Op0.getOperand(0);
6939 HiPart = Op1.getOperand(0);
6940 return true;
6941}
6942
6943SDValue SystemZTargetLowering::combineSTORE(
6944 SDNode *N, DAGCombinerInfo &DCI) const {
6945 SelectionDAG &DAG = DCI.DAG;
6946 auto *SN = cast<StoreSDNode>(N);
6947 auto &Op1 = N->getOperand(1);
6948 EVT MemVT = SN->getMemoryVT();
6949 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
6950 // for the extraction to be done on a vMiN value, so that we can use VSTE.
6951 // If X has wider elements then convert it to:
6952 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
6953 if (MemVT.isInteger() && SN->isTruncatingStore()) {
6954 if (SDValue Value =
6955 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
6956 DCI.AddToWorklist(Value.getNode());
6957
6958 // Rewrite the store with the new form of stored value.
6959 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
6960 SN->getBasePtr(), SN->getMemoryVT(),
6961 SN->getMemOperand());
6962 }
6963 }
6964 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
6965 if (!SN->isTruncatingStore() &&
6966 Op1.getOpcode() == ISD::BSWAP &&
6967 Op1.getNode()->hasOneUse() &&
6968 canLoadStoreByteSwapped(Op1.getValueType())) {
6969
6970 SDValue BSwapOp = Op1.getOperand(0);
6971
6972 if (BSwapOp.getValueType() == MVT::i16)
6973 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
6974
6975 SDValue Ops[] = {
6976 N->getOperand(0), BSwapOp, N->getOperand(2)
6977 };
6978
6979 return
6980 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
6981 Ops, MemVT, SN->getMemOperand());
6982 }
6983 // Combine STORE (element-swap) into VSTER
6984 if (!SN->isTruncatingStore() &&
6985 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
6986 Op1.getNode()->hasOneUse() &&
6987 Subtarget.hasVectorEnhancements2()) {
6988 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
6989 ArrayRef<int> ShuffleMask = SVN->getMask();
6990 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
6991 SDValue Ops[] = {
6992 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
6993 };
6994
6996 DAG.getVTList(MVT::Other),
6997 Ops, MemVT, SN->getMemOperand());
6998 }
6999 }
7000
7001 // Combine STORE (READCYCLECOUNTER) into STCKF.
7002 if (!SN->isTruncatingStore() &&
7004 Op1.hasOneUse() &&
7005 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
7006 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
7008 DAG.getVTList(MVT::Other),
7009 Ops, MemVT, SN->getMemOperand());
7010 }
7011
7012 // Transform a store of an i128 moved from GPRs into two separate stores.
7013 if (MemVT == MVT::i128 && SN->isSimple() && ISD::isNormalStore(SN)) {
7014 SDValue LoPart, HiPart;
7015 if (isMovedFromParts(Op1, LoPart, HiPart)) {
7016 SDLoc DL(SN);
7017 SDValue Chain0 =
7018 DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(),
7019 SN->getPointerInfo(), SN->getOriginalAlign(),
7020 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7021 SDValue Chain1 =
7022 DAG.getStore(SN->getChain(), DL, LoPart,
7023 DAG.getObjectPtrOffset(DL, SN->getBasePtr(),
7025 SN->getPointerInfo().getWithOffset(8),
7026 SN->getOriginalAlign(),
7027 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7028
7029 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
7030 }
7031 }
7032
7033 // Replicate a reg or immediate with VREP instead of scalar multiply or
7034 // immediate load. It seems best to do this during the first DAGCombine as
7035 // it is straight-forward to handle the zero-extend node in the initial
7036 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
7037 // extracting an i16 element from a v16i8 vector).
7038 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
7039 isOnlyUsedByStores(Op1, DAG)) {
7040 SDValue Word = SDValue();
7041 EVT WordVT;
7042
7043 // Find a replicated immediate and return it if found in Word and its
7044 // type in WordVT.
7045 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
7046 // Some constants are better handled with a scalar store.
7047 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
7048 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
7049 return;
7050 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, C->getZExtValue()));
7051 if (VCI.isVectorConstantLegal(Subtarget) &&
7052 VCI.Opcode == SystemZISD::REPLICATE) {
7053 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
7054 WordVT = VCI.VecVT.getScalarType();
7055 }
7056 };
7057
7058 // Find a replicated register and return it if found in Word and its type
7059 // in WordVT.
7060 auto FindReplicatedReg = [&](SDValue MulOp) {
7061 EVT MulVT = MulOp.getValueType();
7062 if (MulOp->getOpcode() == ISD::MUL &&
7063 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
7064 // Find a zero extended value and its type.
7065 SDValue LHS = MulOp->getOperand(0);
7066 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
7067 WordVT = LHS->getOperand(0).getValueType();
7068 else if (LHS->getOpcode() == ISD::AssertZext)
7069 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
7070 else
7071 return;
7072 // Find a replicating constant, e.g. 0x00010001.
7073 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
7075 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
7076 if (VCI.isVectorConstantLegal(Subtarget) &&
7077 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
7078 WordVT == VCI.VecVT.getScalarType())
7079 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
7080 }
7081 }
7082 };
7083
7084 if (isa<BuildVectorSDNode>(Op1) &&
7085 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
7086 SDValue SplatVal = Op1->getOperand(0);
7087 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
7088 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
7089 else
7090 FindReplicatedReg(SplatVal);
7091 } else {
7092 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
7093 FindReplicatedImm(C, MemVT.getStoreSize());
7094 else
7095 FindReplicatedReg(Op1);
7096 }
7097
7098 if (Word != SDValue()) {
7099 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
7100 "Bad type handling");
7101 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
7102 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
7103 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
7104 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
7105 SN->getBasePtr(), SN->getMemOperand());
7106 }
7107 }
7108
7109 return SDValue();
7110}
7111
7112SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
7113 SDNode *N, DAGCombinerInfo &DCI) const {
7114 SelectionDAG &DAG = DCI.DAG;
7115 // Combine element-swap (LOAD) into VLER
7116 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7117 N->getOperand(0).hasOneUse() &&
7118 Subtarget.hasVectorEnhancements2()) {
7119 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
7120 ArrayRef<int> ShuffleMask = SVN->getMask();
7121 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
7122 SDValue Load = N->getOperand(0);
7123 LoadSDNode *LD = cast<LoadSDNode>(Load);
7124
7125 // Create the element-swapping load.
7126 SDValue Ops[] = {
7127 LD->getChain(), // Chain
7128 LD->getBasePtr() // Ptr
7129 };
7130 SDValue ESLoad =
7132 DAG.getVTList(LD->getValueType(0), MVT::Other),
7133 Ops, LD->getMemoryVT(), LD->getMemOperand());
7134
7135 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
7136 // by the load dead.
7137 DCI.CombineTo(N, ESLoad);
7138
7139 // Next, combine the load away, we give it a bogus result value but a real
7140 // chain result. The result value is dead because the shuffle is dead.
7141 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
7142
7143 // Return N so it doesn't get rechecked!
7144 return SDValue(N, 0);
7145 }
7146 }
7147
7148 return SDValue();
7149}
7150
7151SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
7152 SDNode *N, DAGCombinerInfo &DCI) const {
7153 SelectionDAG &DAG = DCI.DAG;
7154
7155 if (!Subtarget.hasVector())
7156 return SDValue();
7157
7158 // Look through bitcasts that retain the number of vector elements.
7159 SDValue Op = N->getOperand(0);
7160 if (Op.getOpcode() == ISD::BITCAST &&
7161 Op.getValueType().isVector() &&
7162 Op.getOperand(0).getValueType().isVector() &&
7163 Op.getValueType().getVectorNumElements() ==
7164 Op.getOperand(0).getValueType().getVectorNumElements())
7165 Op = Op.getOperand(0);
7166
7167 // Pull BSWAP out of a vector extraction.
7168 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
7169 EVT VecVT = Op.getValueType();
7170 EVT EltVT = VecVT.getVectorElementType();
7171 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
7172 Op.getOperand(0), N->getOperand(1));
7173 DCI.AddToWorklist(Op.getNode());
7174 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
7175 if (EltVT != N->getValueType(0)) {
7176 DCI.AddToWorklist(Op.getNode());
7177 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
7178 }
7179 return Op;
7180 }
7181
7182 // Try to simplify a vector extraction.
7183 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
7184 SDValue Op0 = N->getOperand(0);
7185 EVT VecVT = Op0.getValueType();
7186 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
7187 IndexN->getZExtValue(), DCI, false);
7188 }
7189 return SDValue();
7190}
7191
7192SDValue SystemZTargetLowering::combineJOIN_DWORDS(
7193 SDNode *N, DAGCombinerInfo &DCI) const {
7194 SelectionDAG &DAG = DCI.DAG;
7195 // (join_dwords X, X) == (replicate X)
7196 if (N->getOperand(0) == N->getOperand(1))
7197 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
7198 N->getOperand(0));
7199 return SDValue();
7200}
7201
7203 SDValue Chain1 = N1->getOperand(0);
7204 SDValue Chain2 = N2->getOperand(0);
7205
7206 // Trivial case: both nodes take the same chain.
7207 if (Chain1 == Chain2)
7208 return Chain1;
7209
7210 // FIXME - we could handle more complex cases via TokenFactor,
7211 // assuming we can verify that this would not create a cycle.
7212 return SDValue();
7213}
7214
7215SDValue SystemZTargetLowering::combineFP_ROUND(
7216 SDNode *N, DAGCombinerInfo &DCI) const {
7217
7218 if (!Subtarget.hasVector())
7219 return SDValue();
7220
7221 // (fpround (extract_vector_elt X 0))
7222 // (fpround (extract_vector_elt X 1)) ->
7223 // (extract_vector_elt (VROUND X) 0)
7224 // (extract_vector_elt (VROUND X) 2)
7225 //
7226 // This is a special case since the target doesn't really support v2f32s.
7227 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7228 SelectionDAG &DAG = DCI.DAG;
7229 SDValue Op0 = N->getOperand(OpNo);
7230 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
7232 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
7233 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7234 Op0.getConstantOperandVal(1) == 0) {
7235 SDValue Vec = Op0.getOperand(0);
7236 for (auto *U : Vec->uses()) {
7237 if (U != Op0.getNode() && U->hasOneUse() &&
7238 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7239 U->getOperand(0) == Vec &&
7240 U->getOperand(1).getOpcode() == ISD::Constant &&
7241 U->getConstantOperandVal(1) == 1) {
7242 SDValue OtherRound = SDValue(*U->use_begin(), 0);
7243 if (OtherRound.getOpcode() == N->getOpcode() &&
7244 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
7245 OtherRound.getValueType() == MVT::f32) {
7246 SDValue VRound, Chain;
7247 if (N->isStrictFPOpcode()) {
7248 Chain = MergeInputChains(N, OtherRound.getNode());
7249 if (!Chain)
7250 continue;
7252 {MVT::v4f32, MVT::Other}, {Chain, Vec});
7253 Chain = VRound.getValue(1);
7254 } else
7255 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
7256 MVT::v4f32, Vec);
7257 DCI.AddToWorklist(VRound.getNode());
7258 SDValue Extract1 =
7259 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
7260 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
7261 DCI.AddToWorklist(Extract1.getNode());
7262 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
7263 if (Chain)
7264 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
7265 SDValue Extract0 =
7266 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
7267 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7268 if (Chain)
7269 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7270 N->getVTList(), Extract0, Chain);
7271 return Extract0;
7272 }
7273 }
7274 }
7275 }
7276 return SDValue();
7277}
7278
7279SDValue SystemZTargetLowering::combineFP_EXTEND(
7280 SDNode *N, DAGCombinerInfo &DCI) const {
7281
7282 if (!Subtarget.hasVector())
7283 return SDValue();
7284
7285 // (fpextend (extract_vector_elt X 0))
7286 // (fpextend (extract_vector_elt X 2)) ->
7287 // (extract_vector_elt (VEXTEND X) 0)
7288 // (extract_vector_elt (VEXTEND X) 1)
7289 //
7290 // This is a special case since the target doesn't really support v2f32s.
7291 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7292 SelectionDAG &DAG = DCI.DAG;
7293 SDValue Op0 = N->getOperand(OpNo);
7294 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
7296 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
7297 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7298 Op0.getConstantOperandVal(1) == 0) {
7299 SDValue Vec = Op0.getOperand(0);
7300 for (auto *U : Vec->uses()) {
7301 if (U != Op0.getNode() && U->hasOneUse() &&
7302 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7303 U->getOperand(0) == Vec &&
7304 U->getOperand(1).getOpcode() == ISD::Constant &&
7305 U->getConstantOperandVal(1) == 2) {
7306 SDValue OtherExtend = SDValue(*U->use_begin(), 0);
7307 if (OtherExtend.getOpcode() == N->getOpcode() &&
7308 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
7309 OtherExtend.getValueType() == MVT::f64) {
7310 SDValue VExtend, Chain;
7311 if (N->isStrictFPOpcode()) {
7312 Chain = MergeInputChains(N, OtherExtend.getNode());
7313 if (!Chain)
7314 continue;
7315 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
7316 {MVT::v2f64, MVT::Other}, {Chain, Vec});
7317 Chain = VExtend.getValue(1);
7318 } else
7319 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
7320 MVT::v2f64, Vec);
7321 DCI.AddToWorklist(VExtend.getNode());
7322 SDValue Extract1 =
7323 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
7324 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
7325 DCI.AddToWorklist(Extract1.getNode());
7326 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
7327 if (Chain)
7328 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
7329 SDValue Extract0 =
7330 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
7331 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7332 if (Chain)
7333 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7334 N->getVTList(), Extract0, Chain);
7335 return Extract0;
7336 }
7337 }
7338 }
7339 }
7340 return SDValue();
7341}
7342
7343SDValue SystemZTargetLowering::combineINT_TO_FP(
7344 SDNode *N, DAGCombinerInfo &DCI) const {
7345 if (DCI.Level != BeforeLegalizeTypes)
7346 return SDValue();
7347 SelectionDAG &DAG = DCI.DAG;
7348 LLVMContext &Ctx = *DAG.getContext();
7349 unsigned Opcode = N->getOpcode();
7350 EVT OutVT = N->getValueType(0);
7351 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
7352 SDValue Op = N->getOperand(0);
7353 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
7354 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
7355
7356 // Insert an extension before type-legalization to avoid scalarization, e.g.:
7357 // v2f64 = uint_to_fp v2i16
7358 // =>
7359 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
7360 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
7361 OutScalarBits <= 64) {
7362 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
7363 EVT ExtVT = EVT::getVectorVT(
7364 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
7365 unsigned ExtOpcode =
7367 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
7368 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
7369 }
7370 return SDValue();
7371}
7372
7373SDValue SystemZTargetLowering::combineBSWAP(
7374 SDNode *N, DAGCombinerInfo &DCI) const {
7375 SelectionDAG &DAG = DCI.DAG;
7376 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
7377 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7378 N->getOperand(0).hasOneUse() &&
7379 canLoadStoreByteSwapped(N->getValueType(0))) {
7380 SDValue Load = N->getOperand(0);
7381 LoadSDNode *LD = cast<LoadSDNode>(Load);
7382
7383 // Create the byte-swapping load.
7384 SDValue Ops[] = {
7385 LD->getChain(), // Chain
7386 LD->getBasePtr() // Ptr
7387 };
7388 EVT LoadVT = N->getValueType(0);
7389 if (LoadVT == MVT::i16)
7390 LoadVT = MVT::i32;
7391 SDValue BSLoad =
7393 DAG.getVTList(LoadVT, MVT::Other),
7394 Ops, LD->getMemoryVT(), LD->getMemOperand());
7395
7396 // If this is an i16 load, insert the truncate.
7397 SDValue ResVal = BSLoad;
7398 if (N->getValueType(0) == MVT::i16)
7399 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
7400
7401 // First, combine the bswap away. This makes the value produced by the
7402 // load dead.
7403 DCI.CombineTo(N, ResVal);
7404
7405 // Next, combine the load away, we give it a bogus result value but a real
7406 // chain result. The result value is dead because the bswap is dead.
7407 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
7408
7409 // Return N so it doesn't get rechecked!
7410 return SDValue(N, 0);
7411 }
7412
7413 // Look through bitcasts that retain the number of vector elements.
7414 SDValue Op = N->getOperand(0);
7415 if (Op.getOpcode() == ISD::BITCAST &&
7416 Op.getValueType().isVector() &&
7417 Op.getOperand(0).getValueType().isVector() &&
7418 Op.getValueType().getVectorNumElements() ==
7419 Op.getOperand(0).getValueType().getVectorNumElements())
7420 Op = Op.getOperand(0);
7421
7422 // Push BSWAP into a vector insertion if at least one side then simplifies.
7423 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
7424 SDValue Vec = Op.getOperand(0);
7425 SDValue Elt = Op.getOperand(1);
7426 SDValue Idx = Op.getOperand(2);
7427
7429 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
7431 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
7432 (canLoadStoreByteSwapped(N->getValueType(0)) &&
7433 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
7434 EVT VecVT = N->getValueType(0);
7435 EVT EltVT = N->getValueType(0).getVectorElementType();
7436 if (VecVT != Vec.getValueType()) {
7437 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
7438 DCI.AddToWorklist(Vec.getNode());
7439 }
7440 if (EltVT != Elt.getValueType()) {
7441 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
7442 DCI.AddToWorklist(Elt.getNode());
7443 }
7444 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
7445 DCI.AddToWorklist(Vec.getNode());
7446 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
7447 DCI.AddToWorklist(Elt.getNode());
7448 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
7449 Vec, Elt, Idx);
7450 }
7451 }
7452
7453 // Push BSWAP into a vector shuffle if at least one side then simplifies.
7454 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
7455 if (SV && Op.hasOneUse()) {
7456 SDValue Op0 = Op.getOperand(0);
7457 SDValue Op1 = Op.getOperand(1);
7458
7460 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
7462 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
7463 EVT VecVT = N->getValueType(0);
7464 if (VecVT != Op0.getValueType()) {
7465 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
7466 DCI.AddToWorklist(Op0.getNode());
7467 }
7468 if (VecVT != Op1.getValueType()) {
7469 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
7470 DCI.AddToWorklist(Op1.getNode());
7471 }
7472 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
7473 DCI.AddToWorklist(Op0.getNode());
7474 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
7475 DCI.AddToWorklist(Op1.getNode());
7476 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
7477 }
7478 }
7479
7480 return SDValue();
7481}
7482
7483static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
7484 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
7485 // set by the CCReg instruction using the CCValid / CCMask masks,
7486 // If the CCReg instruction is itself a ICMP testing the condition
7487 // code set by some other instruction, see whether we can directly
7488 // use that condition code.
7489
7490 // Verify that we have an ICMP against some constant.
7491 if (CCValid != SystemZ::CCMASK_ICMP)
7492 return false;
7493 auto *ICmp = CCReg.getNode();
7494 if (ICmp->getOpcode() != SystemZISD::ICMP)
7495 return false;
7496 auto *CompareLHS = ICmp->getOperand(0).getNode();
7497 auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
7498 if (!CompareRHS)
7499 return false;
7500
7501 // Optimize the case where CompareLHS is a SELECT_CCMASK.
7502 if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
7503 // Verify that we have an appropriate mask for a EQ or NE comparison.
7504 bool Invert = false;
7505 if (CCMask == SystemZ::CCMASK_CMP_NE)
7506 Invert = !Invert;
7507 else if (CCMask != SystemZ::CCMASK_CMP_EQ)
7508 return false;
7509
7510 // Verify that the ICMP compares against one of select values.
7511 auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
7512 if (!TrueVal)
7513 return false;
7514 auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7515 if (!FalseVal)
7516 return false;
7517 if (CompareRHS->getZExtValue() == FalseVal->getZExtValue())
7518 Invert = !Invert;
7519 else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue())
7520 return false;
7521
7522 // Compute the effective CC mask for the new branch or select.
7523 auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
7524 auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
7525 if (!NewCCValid || !NewCCMask)
7526 return false;
7527 CCValid = NewCCValid->getZExtValue();
7528 CCMask = NewCCMask->getZExtValue();
7529 if (Invert)
7530 CCMask ^= CCValid;
7531
7532 // Return the updated CCReg link.
7533 CCReg = CompareLHS->getOperand(4);
7534 return true;
7535 }
7536
7537 // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
7538 if (CompareLHS->getOpcode() == ISD::SRA) {
7539 auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7540 if (!SRACount || SRACount->getZExtValue() != 30)
7541 return false;
7542 auto *SHL = CompareLHS->getOperand(0).getNode();
7543 if (SHL->getOpcode() != ISD::SHL)
7544 return false;
7545 auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
7546 if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
7547 return false;
7548 auto *IPM = SHL->getOperand(0).getNode();
7549 if (IPM->getOpcode() != SystemZISD::IPM)
7550 return false;
7551
7552 // Avoid introducing CC spills (because SRA would clobber CC).
7553 if (!CompareLHS->hasOneUse())
7554 return false;
7555 // Verify that the ICMP compares against zero.
7556 if (CompareRHS->getZExtValue() != 0)
7557 return false;
7558
7559 // Compute the effective CC mask for the new branch or select.
7560 CCMask = SystemZ::reverseCCMask(CCMask);
7561
7562 // Return the updated CCReg link.
7563 CCReg = IPM->getOperand(0);
7564 return true;
7565 }
7566
7567 return false;
7568}
7569
7570SDValue SystemZTargetLowering::combineBR_CCMASK(
7571 SDNode *N, DAGCombinerInfo &DCI) const {
7572 SelectionDAG &DAG = DCI.DAG;
7573
7574 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
7575 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
7576 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
7577 if (!CCValid || !CCMask)
7578 return SDValue();
7579
7580 int CCValidVal = CCValid->getZExtValue();
7581 int CCMaskVal = CCMask->getZExtValue();
7582 SDValue Chain = N->getOperand(0);
7583 SDValue CCReg = N->getOperand(4);
7584
7585 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7586 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
7587 Chain,
7588 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7589 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7590 N->getOperand(3), CCReg);
7591 return SDValue();
7592}
7593
7594SDValue SystemZTargetLowering::combineSELECT_CCMASK(
7595 SDNode *N, DAGCombinerInfo &DCI) const {
7596 SelectionDAG &DAG = DCI.DAG;
7597
7598 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
7599 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
7600 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
7601 if (!CCValid || !CCMask)
7602 return SDValue();
7603
7604 int CCValidVal = CCValid->getZExtValue();
7605 int CCMaskVal = CCMask->getZExtValue();
7606 SDValue CCReg = N->getOperand(4);
7607
7608 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7609 return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
7610 N->getOperand(0), N->getOperand(1),
7611 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7612 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7613 CCReg);
7614 return SDValue();
7615}
7616
7617
7618SDValue SystemZTargetLowering::combineGET_CCMASK(
7619 SDNode *N, DAGCombinerInfo &DCI) const {
7620
7621 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
7622 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
7623 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
7624 if (!CCValid || !CCMask)
7625 return SDValue();
7626 int CCValidVal = CCValid->getZExtValue();
7627 int CCMaskVal = CCMask->getZExtValue();
7628
7629 SDValue Select = N->getOperand(0);
7630 if (Select->getOpcode() == ISD::TRUNCATE)
7631 Select = Select->getOperand(0);
7632 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
7633 return SDValue();
7634
7635 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
7636 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
7637 if (!SelectCCValid || !SelectCCMask)
7638 return SDValue();
7639 int SelectCCValidVal = SelectCCValid->getZExtValue();
7640 int SelectCCMaskVal = SelectCCMask->getZExtValue();
7641
7642 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
7643 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
7644 if (!TrueVal || !FalseVal)
7645 return SDValue();
7646 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
7647 ;
7648 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
7649 SelectCCMaskVal ^= SelectCCValidVal;
7650 else
7651 return SDValue();
7652
7653 if (SelectCCValidVal & ~CCValidVal)
7654 return SDValue();
7655 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
7656 return SDValue();
7657
7658 return Select->getOperand(4);
7659}
7660
7661SDValue SystemZTargetLowering::combineIntDIVREM(
7662 SDNode *N, DAGCombinerInfo &DCI) const {
7663 SelectionDAG &DAG = DCI.DAG;
7664 EVT VT = N->getValueType(0);
7665 // In the case where the divisor is a vector of constants a cheaper
7666 // sequence of instructions can replace the divide. BuildSDIV is called to
7667 // do this during DAG combining, but it only succeeds when it can build a
7668 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
7669 // since it is not Legal but Custom it can only happen before
7670 // legalization. Therefore we must scalarize this early before Combine
7671 // 1. For widened vectors, this is already the result of type legalization.
7672 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
7673 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
7674 return DAG.UnrollVectorOp(N);
7675 return SDValue();
7676}
7677
7678SDValue SystemZTargetLowering::combineINTRINSIC(
7679 SDNode *N, DAGCombinerInfo &DCI) const {
7680 SelectionDAG &DAG = DCI.DAG;
7681
7682 unsigned Id = N->getConstantOperandVal(1);
7683 switch (Id) {
7684 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
7685 // or larger is simply a vector load.
7686 case Intrinsic::s390_vll:
7687 case Intrinsic::s390_vlrl:
7688 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
7689 if (C->getZExtValue() >= 15)
7690 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
7691 N->getOperand(3), MachinePointerInfo());
7692 break;
7693 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
7694 case Intrinsic::s390_vstl:
7695 case Intrinsic::s390_vstrl:
7696 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
7697 if (C->getZExtValue() >= 15)
7698 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
7699 N->getOperand(4), MachinePointerInfo());
7700 break;
7701 }
7702
7703 return SDValue();
7704}
7705
7706SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
7707 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
7708 return N->getOperand(0);
7709 return N;
7710}
7711
7713 DAGCombinerInfo &DCI) const {
7714 switch(N->getOpcode()) {
7715 default: break;
7716 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
7717 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
7718 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
7720 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
7721 case ISD::LOAD: return combineLOAD(N, DCI);
7722 case ISD::STORE: return combineSTORE(N, DCI);
7723 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
7724 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
7725 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
7727 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
7729 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
7730 case ISD::SINT_TO_FP:
7731 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
7732 case ISD::BSWAP: return combineBSWAP(N, DCI);
7733 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
7734 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
7735 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
7736 case ISD::SDIV:
7737 case ISD::UDIV:
7738 case ISD::SREM:
7739 case ISD::UREM: return combineIntDIVREM(N, DCI);
7741 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
7742 }
7743
7744 return SDValue();
7745}
7746
7747// Return the demanded elements for the OpNo source operand of Op. DemandedElts
7748// are for Op.
7749static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
7750 unsigned OpNo) {
7751 EVT VT = Op.getValueType();
7752 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
7753 APInt SrcDemE;
7754 unsigned Opcode = Op.getOpcode();
7755 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7756 unsigned Id = Op.getConstantOperandVal(0);
7757 switch (Id) {
7758 case Intrinsic::s390_vpksh: // PACKS
7759 case Intrinsic::s390_vpksf:
7760 case Intrinsic::s390_vpksg:
7761 case Intrinsic::s390_vpkshs: // PACKS_CC
7762 case Intrinsic::s390_vpksfs:
7763 case Intrinsic::s390_vpksgs:
7764 case Intrinsic::s390_vpklsh: // PACKLS
7765 case Intrinsic::s390_vpklsf:
7766 case Intrinsic::s390_vpklsg:
7767 case Intrinsic::s390_vpklshs: // PACKLS_CC
7768 case Intrinsic::s390_vpklsfs:
7769 case Intrinsic::s390_vpklsgs:
7770 // VECTOR PACK truncates the elements of two source vectors into one.
7771 SrcDemE = DemandedElts;
7772 if (OpNo == 2)
7773 SrcDemE.lshrInPlace(NumElts / 2);
7774 SrcDemE = SrcDemE.trunc(NumElts / 2);
7775 break;
7776 // VECTOR UNPACK extends half the elements of the source vector.
7777 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7778 case Intrinsic::s390_vuphh:
7779 case Intrinsic::s390_vuphf:
7780 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
7781 case Intrinsic::s390_vuplhh:
7782 case Intrinsic::s390_vuplhf:
7783 SrcDemE = APInt(NumElts * 2, 0);
7784 SrcDemE.insertBits(DemandedElts, 0);
7785 break;
7786 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7787 case Intrinsic::s390_vuplhw:
7788 case Intrinsic::s390_vuplf:
7789 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
7790 case Intrinsic::s390_vupllh:
7791 case Intrinsic::s390_vupllf:
7792 SrcDemE = APInt(NumElts * 2, 0);
7793 SrcDemE.insertBits(DemandedElts, NumElts);
7794 break;
7795 case Intrinsic::s390_vpdi: {
7796 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
7797 SrcDemE = APInt(NumElts, 0);
7798 if (!DemandedElts[OpNo - 1])
7799 break;
7800 unsigned Mask = Op.getConstantOperandVal(3);
7801 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
7802 // Demand input element 0 or 1, given by the mask bit value.
7803 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
7804 break;
7805 }
7806 case Intrinsic::s390_vsldb: {
7807 // VECTOR SHIFT LEFT DOUBLE BY BYTE
7808 assert(VT == MVT::v16i8 && "Unexpected type.");
7809 unsigned FirstIdx = Op.getConstantOperandVal(3);
7810 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
7811 unsigned NumSrc0Els = 16 - FirstIdx;
7812 SrcDemE = APInt(NumElts, 0);
7813 if (OpNo == 1) {
7814 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
7815 SrcDemE.insertBits(DemEls, FirstIdx);
7816 } else {
7817 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
7818 SrcDemE.insertBits(DemEls, 0);
7819 }
7820 break;
7821 }
7822 case Intrinsic::s390_vperm:
7823 SrcDemE = APInt(NumElts, -1);
7824 break;
7825 default:
7826 llvm_unreachable("Unhandled intrinsic.");
7827 break;
7828 }
7829 } else {
7830 switch (Opcode) {
7832 // Scalar operand.
7833 SrcDemE = APInt(1, 1);
7834 break;
7836 SrcDemE = DemandedElts;
7837 break;
7838 default:
7839 llvm_unreachable("Unhandled opcode.");
7840 break;
7841 }
7842 }
7843 return SrcDemE;
7844}
7845
7846static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
7847 const APInt &DemandedElts,
7848 const SelectionDAG &DAG, unsigned Depth,
7849 unsigned OpNo) {
7850 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
7851 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
7852 KnownBits LHSKnown =
7853 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
7854 KnownBits RHSKnown =
7855 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
7856 Known = LHSKnown.intersectWith(RHSKnown);
7857}
7858
7859void
7861 KnownBits &Known,
7862 const APInt &DemandedElts,
7863 const SelectionDAG &DAG,
7864 unsigned Depth) const {
7865 Known.resetAll();
7866
7867 // Intrinsic CC result is returned in the two low bits.
7868 unsigned tmp0, tmp1; // not used
7869 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) {
7870 Known.Zero.setBitsFrom(2);
7871 return;
7872 }
7873 EVT VT = Op.getValueType();
7874 if (Op.getResNo() != 0 || VT == MVT::Untyped)
7875 return;
7876 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
7877 "KnownBits does not match VT in bitwidth");
7878 assert ((!VT.isVector() ||
7879 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
7880 "DemandedElts does not match VT number of elements");
7881 unsigned BitWidth = Known.getBitWidth();
7882 unsigned Opcode = Op.getOpcode();
7883 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7884 bool IsLogical = false;
7885 unsigned Id = Op.getConstantOperandVal(0);
7886 switch (Id) {
7887 case Intrinsic::s390_vpksh: // PACKS
7888 case Intrinsic::s390_vpksf:
7889 case Intrinsic::s390_vpksg:
7890 case Intrinsic::s390_vpkshs: // PACKS_CC
7891 case Intrinsic::s390_vpksfs:
7892 case Intrinsic::s390_vpksgs:
7893 case Intrinsic::s390_vpklsh: // PACKLS
7894 case Intrinsic::s390_vpklsf:
7895 case Intrinsic::s390_vpklsg:
7896 case Intrinsic::s390_vpklshs: // PACKLS_CC
7897 case Intrinsic::s390_vpklsfs:
7898 case Intrinsic::s390_vpklsgs:
7899 case Intrinsic::s390_vpdi:
7900 case Intrinsic::s390_vsldb:
7901 case Intrinsic::s390_vperm:
7902 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
7903 break;
7904 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
7905 case Intrinsic::s390_vuplhh:
7906 case Intrinsic::s390_vuplhf:
7907 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
7908 case Intrinsic::s390_vupllh:
7909 case Intrinsic::s390_vupllf:
7910 IsLogical = true;
7911 [[fallthrough]];
7912 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7913 case Intrinsic::s390_vuphh:
7914 case Intrinsic::s390_vuphf:
7915 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7916 case Intrinsic::s390_vuplhw:
7917 case Intrinsic::s390_vuplf: {
7918 SDValue SrcOp = Op.getOperand(1);
7919 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
7920 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
7921 if (IsLogical) {
7922 Known = Known.zext(BitWidth);
7923 } else
7924 Known = Known.sext(BitWidth);
7925 break;
7926 }
7927 default:
7928 break;
7929 }
7930 } else {
7931 switch (Opcode) {
7934 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
7935 break;
7936 case SystemZISD::REPLICATE: {
7937 SDValue SrcOp = Op.getOperand(0);
7938 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
7939 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
7940 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
7941 break;
7942 }
7943 default:
7944 break;
7945 }
7946 }
7947
7948 // Known has the width of the source operand(s). Adjust if needed to match
7949 // the passed bitwidth.
7950 if (Known.getBitWidth() != BitWidth)
7951 Known = Known.anyextOrTrunc(BitWidth);
7952}
7953
7954static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
7955 const SelectionDAG &DAG, unsigned Depth,
7956 unsigned OpNo) {
7957 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
7958 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
7959 if (LHS == 1) return 1; // Early out.
7960 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
7961 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
7962 if (RHS == 1) return 1; // Early out.
7963 unsigned Common = std::min(LHS, RHS);
7964 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
7965 EVT VT = Op.getValueType();
7966 unsigned VTBits = VT.getScalarSizeInBits();
7967 if (SrcBitWidth > VTBits) { // PACK
7968 unsigned SrcExtraBits = SrcBitWidth - VTBits;
7969 if (Common > SrcExtraBits)
7970 return (Common - SrcExtraBits);
7971 return 1;
7972 }
7973 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
7974 return Common;
7975}
7976
7977unsigned
7979 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
7980 unsigned Depth) const {
7981 if (Op.getResNo() != 0)
7982 return 1;
7983 unsigned Opcode = Op.getOpcode();
7984 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7985 unsigned Id = Op.getConstantOperandVal(0);
7986 switch (Id) {
7987 case Intrinsic::s390_vpksh: // PACKS
7988 case Intrinsic::s390_vpksf:
7989 case Intrinsic::s390_vpksg:
7990 case Intrinsic::s390_vpkshs: // PACKS_CC
7991 case Intrinsic::s390_vpksfs:
7992 case Intrinsic::s390_vpksgs:
7993 case Intrinsic::s390_vpklsh: // PACKLS
7994 case Intrinsic::s390_vpklsf:
7995 case Intrinsic::s390_vpklsg:
7996 case Intrinsic::s390_vpklshs: // PACKLS_CC
7997 case Intrinsic::s390_vpklsfs:
7998 case Intrinsic::s390_vpklsgs:
7999 case Intrinsic::s390_vpdi:
8000 case Intrinsic::s390_vsldb:
8001 case Intrinsic::s390_vperm:
8002 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
8003 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
8004 case Intrinsic::s390_vuphh:
8005 case Intrinsic::s390_vuphf:
8006 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
8007 case Intrinsic::s390_vuplhw:
8008 case Intrinsic::s390_vuplf: {
8009 SDValue PackedOp = Op.getOperand(1);
8010 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
8011 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
8012 EVT VT = Op.getValueType();
8013 unsigned VTBits = VT.getScalarSizeInBits();
8014 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
8015 return Tmp;
8016 }
8017 default:
8018 break;
8019 }
8020 } else {
8021 switch (Opcode) {
8023 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
8024 default:
8025 break;
8026 }
8027 }
8028
8029 return 1;
8030}
8031
8034 const APInt &DemandedElts, const SelectionDAG &DAG,
8035 bool PoisonOnly, unsigned Depth) const {
8036 switch (Op->getOpcode()) {
8039 return true;
8040 }
8041 return false;
8042}
8043
8044unsigned
8046 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
8047 unsigned StackAlign = TFI->getStackAlignment();
8048 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
8049 "Unexpected stack alignment");
8050 // The default stack probe size is 4096 if the function has no
8051 // stack-probe-size attribute.
8052 unsigned StackProbeSize =
8053 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
8054 // Round down to the stack alignment.
8055 StackProbeSize &= ~(StackAlign - 1);
8056 return StackProbeSize ? StackProbeSize : StackAlign;
8057}
8058
8059//===----------------------------------------------------------------------===//
8060// Custom insertion
8061//===----------------------------------------------------------------------===//
8062
8063// Force base value Base into a register before MI. Return the register.
8065 const SystemZInstrInfo *TII) {
8066 MachineBasicBlock *MBB = MI.getParent();
8067 MachineFunction &MF = *MBB->getParent();
8069
8070 if (Base.isReg()) {
8071 // Copy Base into a new virtual register to help register coalescing in
8072 // cases with multiple uses.
8073 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8074 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
8075 .add(Base);
8076 return Reg;
8077 }
8078
8079 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8080 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
8081 .add(Base)
8082 .addImm(0)
8083 .addReg(0);
8084 return Reg;
8085}
8086
8087// The CC operand of MI might be missing a kill marker because there
8088// were multiple uses of CC, and ISel didn't know which to mark.
8089// Figure out whether MI should have had a kill marker.
8091 // Scan forward through BB for a use/def of CC.
8093 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
8094 const MachineInstr& mi = *miI;
8095 if (mi.readsRegister(SystemZ::CC))
8096 return false;
8097 if (mi.definesRegister(SystemZ::CC))
8098 break; // Should have kill-flag - update below.
8099 }
8100
8101 // If we hit the end of the block, check whether CC is live into a
8102 // successor.
8103 if (miI == MBB->end()) {
8104 for (const MachineBasicBlock *Succ : MBB->successors())
8105 if (Succ->isLiveIn(SystemZ::CC))
8106 return false;
8107 }
8108
8109 return true;
8110}
8111
8112// Return true if it is OK for this Select pseudo-opcode to be cascaded
8113// together with other Select pseudo-opcodes into a single basic-block with
8114// a conditional jump around it.
8116 switch (MI.getOpcode()) {
8117 case SystemZ::Select32:
8118 case SystemZ::Select64:
8119 case SystemZ::Select128:
8120 case SystemZ::SelectF32:
8121 case SystemZ::SelectF64:
8122 case SystemZ::SelectF128:
8123 case SystemZ::SelectVR32:
8124 case SystemZ::SelectVR64:
8125 case SystemZ::SelectVR128:
8126 return true;
8127
8128 default:
8129 return false;
8130 }
8131}
8132
8133// Helper function, which inserts PHI functions into SinkMBB:
8134// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
8135// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
8137 MachineBasicBlock *TrueMBB,
8138 MachineBasicBlock *FalseMBB,
8139 MachineBasicBlock *SinkMBB) {
8140 MachineFunction *MF = TrueMBB->getParent();
8142
8143 MachineInstr *FirstMI = Selects.front();
8144 unsigned CCValid = FirstMI->getOperand(3).getImm();
8145 unsigned CCMask = FirstMI->getOperand(4).getImm();
8146
8147 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
8148
8149 // As we are creating the PHIs, we have to be careful if there is more than
8150 // one. Later Selects may reference the results of earlier Selects, but later
8151 // PHIs have to reference the individual true/false inputs from earlier PHIs.
8152 // That also means that PHI construction must work forward from earlier to
8153 // later, and that the code must maintain a mapping from earlier PHI's
8154 // destination registers, and the registers that went into the PHI.
8156
8157 for (auto *MI : Selects) {
8158 Register DestReg = MI->getOperand(0).getReg();
8159 Register TrueReg = MI->getOperand(1).getReg();
8160 Register FalseReg = MI->getOperand(2).getReg();
8161
8162 // If this Select we are generating is the opposite condition from
8163 // the jump we generated, then we have to swap the operands for the
8164 // PHI that is going to be generated.
8165 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
8166 std::swap(TrueReg, FalseReg);
8167
8168 if (RegRewriteTable.contains(TrueReg))
8169 TrueReg = RegRewriteTable[TrueReg].first;
8170
8171 if (RegRewriteTable.contains(FalseReg))
8172 FalseReg = RegRewriteTable[FalseReg].second;
8173
8174 DebugLoc DL = MI->getDebugLoc();
8175 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
8176 .addReg(TrueReg).addMBB(TrueMBB)
8177 .addReg(FalseReg).addMBB(FalseMBB);
8178
8179 // Add this PHI to the rewrite table.
8180 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
8181 }
8182
8184}
8185
8187SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
8188 MachineBasicBlock *BB) const {
8189 MachineFunction &MF = *BB->getParent();
8190 MachineFrameInfo &MFI = MF.getFrameInfo();
8191 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
8192 assert(TFL->hasReservedCallFrame(MF) &&
8193 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
8194 (void)TFL;
8195 // Get the MaxCallFrameSize value and erase MI since it serves no further
8196 // purpose as the call frame is statically reserved in the prolog. Set
8197 // AdjustsStack as MI is *not* mapped as a frame instruction.
8198 uint32_t NumBytes = MI.getOperand(0).getImm();
8199 if (NumBytes > MFI.getMaxCallFrameSize())
8200 MFI.setMaxCallFrameSize(NumBytes);
8201 MFI.setAdjustsStack(true);
8202
8203 MI.eraseFromParent();
8204 return BB;
8205}
8206
8207// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
8209SystemZTargetLowering::emitSelect(MachineInstr &MI,
8210 MachineBasicBlock *MBB) const {
8211 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
8212 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8213
8214 unsigned CCValid = MI.getOperand(3).getImm();
8215 unsigned CCMask = MI.getOperand(4).getImm();
8216
8217 // If we have a sequence of Select* pseudo instructions using the
8218 // same condition code value, we want to expand all of them into
8219 // a single pair of basic blocks using the same condition.
8222 Selects.push_back(&MI);
8223 unsigned Count = 0;
8224 for (MachineInstr &NextMI : llvm::make_range(
8225 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
8226 if (isSelectPseudo(NextMI)) {
8227 assert(NextMI.getOperand(3).getImm() == CCValid &&
8228 "Bad CCValid operands since CC was not redefined.");
8229 if (NextMI.getOperand(4).getImm() == CCMask ||
8230 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
8231 Selects.push_back(&NextMI);
8232 continue;
8233 }
8234 break;
8235 }
8236 if (NextMI.definesRegister(SystemZ::CC) || NextMI.usesCustomInsertionHook())
8237 break;
8238 bool User = false;
8239 for (auto *SelMI : Selects)
8240 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
8241 User = true;
8242 break;
8243 }
8244 if (NextMI.isDebugInstr()) {
8245 if (User) {
8246 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
8247 DbgValues.push_back(&NextMI);
8248 }
8249 } else if (User || ++Count > 20)
8250 break;
8251 }
8252
8253 MachineInstr *LastMI = Selects.back();
8254 bool CCKilled =
8255 (LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB));
8256 MachineBasicBlock *StartMBB = MBB;
8258 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8259
8260 // Unless CC was killed in the last Select instruction, mark it as
8261 // live-in to both FalseMBB and JoinMBB.
8262 if (!CCKilled) {
8263 FalseMBB->addLiveIn(SystemZ::CC);
8264 JoinMBB->addLiveIn(SystemZ::CC);
8265 }
8266
8267 // StartMBB:
8268 // BRC CCMask, JoinMBB
8269 // # fallthrough to FalseMBB
8270 MBB = StartMBB;
8271 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8272 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8273 MBB->addSuccessor(JoinMBB);
8274 MBB->addSuccessor(FalseMBB);
8275
8276 // FalseMBB:
8277 // # fallthrough to JoinMBB
8278 MBB = FalseMBB;
8279 MBB->addSuccessor(JoinMBB);
8280
8281 // JoinMBB:
8282 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
8283 // ...
8284 MBB = JoinMBB;
8285 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
8286 for (auto *SelMI : Selects)
8287 SelMI->eraseFromParent();
8288
8290 for (auto *DbgMI : DbgValues)
8291 MBB->splice(InsertPos, StartMBB, DbgMI);
8292
8293 return JoinMBB;
8294}
8295
8296// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
8297// StoreOpcode is the store to use and Invert says whether the store should
8298// happen when the condition is false rather than true. If a STORE ON
8299// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
8300MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
8302 unsigned StoreOpcode,
8303 unsigned STOCOpcode,
8304 bool Invert) const {
8305 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8306
8307 Register SrcReg = MI.getOperand(0).getReg();
8308 MachineOperand Base = MI.getOperand(1);
8309 int64_t Disp = MI.getOperand(2).getImm();
8310 Register IndexReg = MI.getOperand(3).getReg();
8311 unsigned CCValid = MI.getOperand(4).getImm();
8312 unsigned CCMask = MI.getOperand(5).getImm();
8313 DebugLoc DL = MI.getDebugLoc();
8314
8315 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
8316
8317 // ISel pattern matching also adds a load memory operand of the same
8318 // address, so take special care to find the storing memory operand.
8319 MachineMemOperand *MMO = nullptr;
8320 for (auto *I : MI.memoperands())
8321 if (I->isStore()) {
8322 MMO = I;
8323 break;
8324 }
8325
8326 // Use STOCOpcode if possible. We could use different store patterns in
8327 // order to avoid matching the index register, but the performance trade-offs
8328 // might be more complicated in that case.
8329 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
8330 if (Invert)
8331 CCMask ^= CCValid;
8332
8333 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
8334 .addReg(SrcReg)
8335 .add(Base)
8336 .addImm(Disp)
8337 .addImm(CCValid)
8338 .addImm(CCMask)
8339 .addMemOperand(MMO);
8340
8341 MI.eraseFromParent();
8342 return MBB;
8343 }
8344
8345 // Get the condition needed to branch around the store.
8346 if (!Invert)
8347 CCMask ^= CCValid;
8348
8349 MachineBasicBlock *StartMBB = MBB;
8351 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8352
8353 // Unless CC was killed in the CondStore instruction, mark it as
8354 // live-in to both FalseMBB and JoinMBB.
8355 if (!MI.killsRegister(SystemZ::CC) && !checkCCKill(MI, JoinMBB)) {
8356 FalseMBB->addLiveIn(SystemZ::CC);
8357 JoinMBB->addLiveIn(SystemZ::CC);
8358 }
8359
8360 // StartMBB:
8361 // BRC CCMask, JoinMBB
8362 // # fallthrough to FalseMBB
8363 MBB = StartMBB;
8364 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8365 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8366 MBB->addSuccessor(JoinMBB);
8367 MBB->addSuccessor(FalseMBB);
8368
8369 // FalseMBB:
8370 // store %SrcReg, %Disp(%Index,%Base)
8371 // # fallthrough to JoinMBB
8372 MBB = FalseMBB;
8373 BuildMI(MBB, DL, TII->get(StoreOpcode))
8374 .addReg(SrcReg)
8375 .add(Base)
8376 .addImm(Disp)
8377 .addReg(IndexReg)
8378 .addMemOperand(MMO);
8379 MBB->addSuccessor(JoinMBB);
8380
8381 MI.eraseFromParent();
8382 return JoinMBB;
8383}
8384
8385// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
8387SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
8389 bool Unsigned) const {
8390 MachineFunction &MF = *MBB->getParent();
8391 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8393
8394 // Synthetic instruction to compare 128-bit values.
8395 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
8396 Register Op0 = MI.getOperand(0).getReg();
8397 Register Op1 = MI.getOperand(1).getReg();
8398
8399 MachineBasicBlock *StartMBB = MBB;
8401 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
8402
8403 // StartMBB:
8404 //
8405 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
8406 // Swap the inputs to get:
8407 // CC 1 if high(Op0) > high(Op1)
8408 // CC 2 if high(Op0) < high(Op1)
8409 // CC 0 if high(Op0) == high(Op1)
8410 //
8411 // If CC != 0, we'd done, so jump over the next instruction.
8412 //
8413 // VEC[L]G Op1, Op0
8414 // JNE JoinMBB
8415 // # fallthrough to HiEqMBB
8416 MBB = StartMBB;
8417 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
8418 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
8419 .addReg(Op1).addReg(Op0);
8420 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8422 MBB->addSuccessor(JoinMBB);
8423 MBB->addSuccessor(HiEqMBB);
8424
8425 // HiEqMBB:
8426 //
8427 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
8428 // Since we already know the high parts are equal, the CC
8429 // result will only depend on the low parts:
8430 // CC 1 if low(Op0) > low(Op1)
8431 // CC 3 if low(Op0) <= low(Op1)
8432 //
8433 // VCHLGS Tmp, Op0, Op1
8434 // # fallthrough to JoinMBB
8435 MBB = HiEqMBB;
8436 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
8437 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
8438 .addReg(Op0).addReg(Op1);
8439 MBB->addSuccessor(JoinMBB);
8440
8441 // Mark CC as live-in to JoinMBB.
8442 JoinMBB->addLiveIn(SystemZ::CC);
8443
8444 MI.eraseFromParent();
8445 return JoinMBB;
8446}
8447
8448// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
8449// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
8450// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
8451// whether the field should be inverted after performing BinOpcode (e.g. for
8452// NAND).
8453MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
8454 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
8455 bool Invert) const {
8456 MachineFunction &MF = *MBB->getParent();
8457 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8459
8460 // Extract the operands. Base can be a register or a frame index.
8461 // Src2 can be a register or immediate.
8462 Register Dest = MI.getOperand(0).getReg();
8463 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8464 int64_t Disp = MI.getOperand(2).getImm();
8465 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
8466 Register BitShift = MI.getOperand(4).getReg();
8467 Register NegBitShift = MI.getOperand(5).getReg();
8468 unsigned BitSize = MI.getOperand(6).getImm();
8469 DebugLoc DL = MI.getDebugLoc();
8470
8471 // Get the right opcodes for the displacement.
8472 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8473 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8474 assert(LOpcode && CSOpcode && "Displacement out of range");
8475
8476 // Create virtual registers for temporary results.
8477 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8478 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8479 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8480 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8481 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8482
8483 // Insert a basic block for the main loop.
8484 MachineBasicBlock *StartMBB = MBB;
8486 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8487
8488 // StartMBB:
8489 // ...
8490 // %OrigVal = L Disp(%Base)
8491 // # fall through to LoopMBB
8492 MBB = StartMBB;
8493 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
8494 MBB->addSuccessor(LoopMBB);
8495
8496 // LoopMBB:
8497 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
8498 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
8499 // %RotatedNewVal = OP %RotatedOldVal, %Src2
8500 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
8501 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
8502 // JNE LoopMBB
8503 // # fall through to DoneMBB
8504 MBB = LoopMBB;
8505 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8506 .addReg(OrigVal).addMBB(StartMBB)
8507 .addReg(Dest).addMBB(LoopMBB);
8508 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
8509 .addReg(OldVal).addReg(BitShift).addImm(0);
8510 if (Invert) {
8511 // Perform the operation normally and then invert every bit of the field.
8512 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8513 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
8514 // XILF with the upper BitSize bits set.
8515 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
8516 .addReg(Tmp).addImm(-1U << (32 - BitSize));
8517 } else if (BinOpcode)
8518 // A simply binary operation.
8519 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
8520 .addReg(RotatedOldVal)
8521 .add(Src2);
8522 else
8523 // Use RISBG to rotate Src2 into position and use it to replace the
8524 // field in RotatedOldVal.
8525 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
8526 .addReg(RotatedOldVal).addReg(Src2.getReg())
8527 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
8528 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
8529 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
8530 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
8531 .addReg(OldVal)
8532 .addReg(NewVal)
8533 .add(Base)
8534 .addImm(Disp);
8535 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8537 MBB->addSuccessor(LoopMBB);
8538 MBB->addSuccessor(DoneMBB);
8539
8540 MI.eraseFromParent();
8541 return DoneMBB;
8542}
8543
8544// Implement EmitInstrWithCustomInserter for subword pseudo
8545// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
8546// instruction that should be used to compare the current field with the
8547// minimum or maximum value. KeepOldMask is the BRC condition-code mask
8548// for when the current field should be kept.
8549MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
8550 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
8551 unsigned KeepOldMask) const {
8552 MachineFunction &MF = *MBB->getParent();
8553 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8555
8556 // Extract the operands. Base can be a register or a frame index.
8557 Register Dest = MI.getOperand(0).getReg();
8558 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8559 int64_t Disp = MI.getOperand(2).getImm();
8560 Register Src2 = MI.getOperand(3).getReg();
8561 Register BitShift = MI.getOperand(4).getReg();
8562 Register NegBitShift = MI.getOperand(5).getReg();
8563 unsigned BitSize = MI.getOperand(6).getImm();
8564 DebugLoc DL = MI.getDebugLoc();
8565
8566 // Get the right opcodes for the displacement.
8567 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8568 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8569 assert(LOpcode && CSOpcode && "Displacement out of range");
8570
8571 // Create virtual registers for temporary results.
8572 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8573 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8574 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8575 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8576 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8577 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8578
8579 // Insert 3 basic blocks for the loop.
8580 MachineBasicBlock *StartMBB = MBB;
8582 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8583 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
8584 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
8585
8586 // StartMBB:
8587 // ...
8588 // %OrigVal = L Disp(%Base)
8589 // # fall through to LoopMBB
8590 MBB = StartMBB;
8591 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
8592 MBB->addSuccessor(LoopMBB);
8593
8594 // LoopMBB:
8595 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
8596 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
8597 // CompareOpcode %RotatedOldVal, %Src2
8598 // BRC KeepOldMask, UpdateMBB
8599 MBB = LoopMBB;
8600 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8601 .addReg(OrigVal).addMBB(StartMBB)
8602 .addReg(Dest).addMBB(UpdateMBB);
8603 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
8604 .addReg(OldVal).addReg(BitShift).addImm(0);
8605 BuildMI(MBB, DL, TII->get(CompareOpcode))
8606 .addReg(RotatedOldVal).addReg(Src2);
8607 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8608 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
8609 MBB->addSuccessor(UpdateMBB);
8610 MBB->addSuccessor(UseAltMBB);
8611
8612 // UseAltMBB:
8613 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
8614 // # fall through to UpdateMBB
8615 MBB = UseAltMBB;
8616 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
8617 .addReg(RotatedOldVal).addReg(Src2)
8618 .addImm(32).addImm(31 + BitSize).addImm(0);
8619 MBB->addSuccessor(UpdateMBB);
8620
8621 // UpdateMBB:
8622 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
8623 // [ %RotatedAltVal, UseAltMBB ]
8624 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
8625 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
8626 // JNE LoopMBB
8627 // # fall through to DoneMBB
8628 MBB = UpdateMBB;
8629 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
8630 .addReg(RotatedOldVal).addMBB(LoopMBB)
8631 .addReg(RotatedAltVal).addMBB(UseAltMBB);
8632 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
8633 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
8634 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
8635 .addReg(OldVal)
8636 .addReg(NewVal)
8637 .add(Base)
8638 .addImm(Disp);
8639 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8641 MBB->addSuccessor(LoopMBB);
8642 MBB->addSuccessor(DoneMBB);
8643
8644 MI.eraseFromParent();
8645 return DoneMBB;
8646}
8647
8648// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
8649// instruction MI.
8651SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
8652 MachineBasicBlock *MBB) const {
8653 MachineFunction &MF = *MBB->getParent();
8654 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8656
8657 // Extract the operands. Base can be a register or a frame index.
8658 Register Dest = MI.getOperand(0).getReg();
8659 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8660 int64_t Disp = MI.getOperand(2).getImm();
8661 Register CmpVal = MI.getOperand(3).getReg();
8662 Register OrigSwapVal = MI.getOperand(4).getReg();
8663 Register BitShift = MI.getOperand(5).getReg();
8664 Register NegBitShift = MI.getOperand(6).getReg();
8665 int64_t BitSize = MI.getOperand(7).getImm();
8666 DebugLoc DL = MI.getDebugLoc();
8667
8668 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
8669
8670 // Get the right opcodes for the displacement and zero-extension.
8671 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8672 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8673 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
8674 assert(LOpcode && CSOpcode && "Displacement out of range");
8675
8676 // Create virtual registers for temporary results.
8677 Register OrigOldVal = MRI.createVirtualRegister(RC);
8678 Register OldVal = MRI.createVirtualRegister(RC);
8679 Register SwapVal = MRI.createVirtualRegister(RC);
8680 Register StoreVal = MRI.createVirtualRegister(RC);
8681 Register OldValRot = MRI.createVirtualRegister(RC);
8682 Register RetryOldVal = MRI.createVirtualRegister(RC);
8683 Register RetrySwapVal = MRI.createVirtualRegister(RC);
8684
8685 // Insert 2 basic blocks for the loop.
8686 MachineBasicBlock *StartMBB = MBB;
8688 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8689 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
8690
8691 // StartMBB:
8692 // ...
8693 // %OrigOldVal = L Disp(%Base)
8694 // # fall through to LoopMBB
8695 MBB = StartMBB;
8696 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
8697 .add(Base)
8698 .addImm(Disp)
8699 .addReg(0);
8700 MBB->addSuccessor(LoopMBB);
8701
8702 // LoopMBB:
8703 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
8704 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
8705 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
8706 // ^^ The low BitSize bits contain the field
8707 // of interest.
8708 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
8709 // ^^ Replace the upper 32-BitSize bits of the
8710 // swap value with those that we loaded and rotated.
8711 // %Dest = LL[CH] %OldValRot
8712 // CR %Dest, %CmpVal
8713 // JNE DoneMBB
8714 // # Fall through to SetMBB
8715 MBB = LoopMBB;
8716 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8717 .addReg(OrigOldVal).addMBB(StartMBB)
8718 .addReg(RetryOldVal).addMBB(SetMBB);
8719 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
8720 .addReg(OrigSwapVal).addMBB(StartMBB)
8721 .addReg(RetrySwapVal).addMBB(SetMBB);
8722 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
8723 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
8724 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
8725 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
8726 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
8727 .addReg(OldValRot);
8728 BuildMI(MBB, DL, TII->get(SystemZ::CR))
8729 .addReg(Dest).addReg(CmpVal);
8730 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8733 MBB->addSuccessor(DoneMBB);
8734 MBB->addSuccessor(SetMBB);
8735
8736 // SetMBB:
8737 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
8738 // ^^ Rotate the new field to its proper position.
8739 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
8740 // JNE LoopMBB
8741 // # fall through to ExitMBB
8742 MBB = SetMBB;
8743 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
8744 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
8745 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
8746 .addReg(OldVal)
8747 .addReg(StoreVal)
8748 .add(Base)
8749 .addImm(Disp);
8750 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8752 MBB->addSuccessor(LoopMBB);
8753 MBB->addSuccessor(DoneMBB);
8754
8755 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
8756 // to the block after the loop. At this point, CC may have been defined
8757 // either by the CR in LoopMBB or by the CS in SetMBB.
8758 if (!MI.registerDefIsDead(SystemZ::CC))
8759 DoneMBB->addLiveIn(SystemZ::CC);
8760
8761 MI.eraseFromParent();
8762 return DoneMBB;
8763}
8764
8765// Emit a move from two GR64s to a GR128.
8767SystemZTargetLowering::emitPair128(MachineInstr &MI,
8768 MachineBasicBlock *MBB) const {
8769 MachineFunction &MF = *MBB->getParent();
8770 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8772 DebugLoc DL = MI.getDebugLoc();
8773
8774 Register Dest = MI.getOperand(0).getReg();
8775 Register Hi = MI.getOperand(1).getReg();
8776 Register Lo = MI.getOperand(2).getReg();
8777 Register Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8778 Register Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8779
8780 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Tmp1);
8781 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Tmp2)
8782 .addReg(Tmp1).addReg(Hi).addImm(SystemZ::subreg_h64);
8783 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
8784 .addReg(Tmp2).addReg(Lo).addImm(SystemZ::subreg_l64);
8785
8786 MI.eraseFromParent();
8787 return MBB;
8788}
8789
8790// Emit an extension from a GR64 to a GR128. ClearEven is true
8791// if the high register of the GR128 value must be cleared or false if
8792// it's "don't care".
8793MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
8795 bool ClearEven) const {
8796 MachineFunction &MF = *MBB->getParent();
8797 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8799 DebugLoc DL = MI.getDebugLoc();
8800
8801 Register Dest = MI.getOperand(0).getReg();
8802 Register Src = MI.getOperand(1).getReg();
8803 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8804
8805 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
8806 if (ClearEven) {
8807 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8808 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
8809
8810 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
8811 .addImm(0);
8812 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
8813 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
8814 In128 = NewIn128;
8815 }
8816 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
8817 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
8818
8819 MI.eraseFromParent();
8820 return MBB;
8821}
8822
8824SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
8826 unsigned Opcode, bool IsMemset) const {
8827 MachineFunction &MF = *MBB->getParent();
8828 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8830 DebugLoc DL = MI.getDebugLoc();
8831
8832 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
8833 uint64_t DestDisp = MI.getOperand(1).getImm();
8834 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
8835 uint64_t SrcDisp;
8836
8837 // Fold the displacement Disp if it is out of range.
8838 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
8839 if (!isUInt<12>(Disp)) {
8840 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8841 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
8842 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
8843 .add(Base).addImm(Disp).addReg(0);
8844 Base = MachineOperand::CreateReg(Reg, false);
8845 Disp = 0;
8846 }
8847 };
8848
8849 if (!IsMemset) {
8850 SrcBase = earlyUseOperand(MI.getOperand(2));
8851 SrcDisp = MI.getOperand(3).getImm();
8852 } else {
8853 SrcBase = DestBase;
8854 SrcDisp = DestDisp++;
8855 foldDisplIfNeeded(DestBase, DestDisp);
8856 }
8857
8858 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
8859 bool IsImmForm = LengthMO.isImm();
8860 bool IsRegForm = !IsImmForm;
8861
8862 // Build and insert one Opcode of Length, with special treatment for memset.
8863 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
8865 MachineOperand DBase, uint64_t DDisp,
8867 unsigned Length) -> void {
8868 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
8869 if (IsMemset) {
8870 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
8871 if (ByteMO.isImm())
8872 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
8873 .add(SBase).addImm(SDisp).add(ByteMO);
8874 else
8875 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
8876 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
8877 if (--Length == 0)
8878 return;
8879 }
8880 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
8881 .add(DBase).addImm(DDisp).addImm(Length)
8882 .add(SBase).addImm(SDisp)
8883 .setMemRefs(MI.memoperands());
8884 };
8885
8886 bool NeedsLoop = false;
8887 uint64_t ImmLength = 0;
8888 Register LenAdjReg = SystemZ::NoRegister;
8889 if (IsImmForm) {
8890 ImmLength = LengthMO.getImm();
8891 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
8892 if (ImmLength == 0) {
8893 MI.eraseFromParent();
8894 return MBB;
8895 }
8896 if (Opcode == SystemZ::CLC) {
8897 if (ImmLength > 3 * 256)
8898 // A two-CLC sequence is a clear win over a loop, not least because
8899 // it needs only one branch. A three-CLC sequence needs the same
8900 // number of branches as a loop (i.e. 2), but is shorter. That
8901 // brings us to lengths greater than 768 bytes. It seems relatively
8902 // likely that a difference will be found within the first 768 bytes,
8903 // so we just optimize for the smallest number of branch
8904 // instructions, in order to avoid polluting the prediction buffer
8905 // too much.
8906 NeedsLoop = true;
8907 } else if (ImmLength > 6 * 256)
8908 // The heuristic we use is to prefer loops for anything that would
8909 // require 7 or more MVCs. With these kinds of sizes there isn't much
8910 // to choose between straight-line code and looping code, since the
8911 // time will be dominated by the MVCs themselves.
8912 NeedsLoop = true;
8913 } else {
8914 NeedsLoop = true;
8915 LenAdjReg = LengthMO.getReg();
8916 }
8917
8918 // When generating more than one CLC, all but the last will need to
8919 // branch to the end when a difference is found.
8920 MachineBasicBlock *EndMBB =
8921 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
8923 : nullptr);
8924
8925 if (NeedsLoop) {
8926 Register StartCountReg =
8927 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
8928 if (IsImmForm) {
8929 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
8930 ImmLength &= 255;
8931 } else {
8932 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
8933 .addReg(LenAdjReg)
8934 .addReg(0)
8935 .addImm(8);
8936 }
8937
8938 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
8939 auto loadZeroAddress = [&]() -> MachineOperand {
8940 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8941 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
8942 return MachineOperand::CreateReg(Reg, false);
8943 };
8944 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
8945 DestBase = loadZeroAddress();
8946 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
8947 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
8948
8949 MachineBasicBlock *StartMBB = nullptr;
8950 MachineBasicBlock *LoopMBB = nullptr;
8951 MachineBasicBlock *NextMBB = nullptr;
8952 MachineBasicBlock *DoneMBB = nullptr;
8953 MachineBasicBlock *AllDoneMBB = nullptr;
8954
8955 Register StartSrcReg = forceReg(MI, SrcBase, TII);
8956 Register StartDestReg =
8957 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
8958
8959 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
8960 Register ThisSrcReg = MRI.createVirtualRegister(RC);
8961 Register ThisDestReg =
8962 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
8963 Register NextSrcReg = MRI.createVirtualRegister(RC);
8964 Register NextDestReg =
8965 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
8966 RC = &SystemZ::GR64BitRegClass;
8967 Register ThisCountReg = MRI.createVirtualRegister(RC);
8968 Register NextCountReg = MRI.createVirtualRegister(RC);
8969
8970 if (IsRegForm) {
8971 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
8972 StartMBB = SystemZ::emitBlockAfter(MBB);
8973 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8974 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
8975 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
8976
8977 // MBB:
8978 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
8979 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8980 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
8981 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8983 .addMBB(AllDoneMBB);
8984 MBB->addSuccessor(AllDoneMBB);
8985 if (!IsMemset)
8986 MBB->addSuccessor(StartMBB);
8987 else {
8988 // MemsetOneCheckMBB:
8989 // # Jump to MemsetOneMBB for a memset of length 1, or
8990 // # fall thru to StartMBB.
8991 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
8992 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
8993 MBB->addSuccessor(MemsetOneCheckMBB);
8994 MBB = MemsetOneCheckMBB;
8995 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8996 .addReg(LenAdjReg).addImm(-1);
8997 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8999 .addMBB(MemsetOneMBB);
9000 MBB->addSuccessor(MemsetOneMBB, {10, 100});
9001 MBB->addSuccessor(StartMBB, {90, 100});
9002
9003 // MemsetOneMBB:
9004 // # Jump back to AllDoneMBB after a single MVI or STC.
9005 MBB = MemsetOneMBB;
9006 insertMemMemOp(MBB, MBB->end(),
9007 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
9008 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
9009 1);
9010 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
9011 MBB->addSuccessor(AllDoneMBB);
9012 }
9013
9014 // StartMBB:
9015 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
9016 MBB = StartMBB;
9017 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9018 .addReg(StartCountReg).addImm(0);
9019 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9021 .addMBB(DoneMBB);
9022 MBB->addSuccessor(DoneMBB);
9023 MBB->addSuccessor(LoopMBB);
9024 }
9025 else {
9026 StartMBB = MBB;
9027 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
9028 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9029 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
9030
9031 // StartMBB:
9032 // # fall through to LoopMBB
9033 MBB->addSuccessor(LoopMBB);
9034
9035 DestBase = MachineOperand::CreateReg(NextDestReg, false);
9036 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
9037 if (EndMBB && !ImmLength)
9038 // If the loop handled the whole CLC range, DoneMBB will be empty with
9039 // CC live-through into EndMBB, so add it as live-in.
9040 DoneMBB->addLiveIn(SystemZ::CC);
9041 }
9042
9043 // LoopMBB:
9044 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
9045 // [ %NextDestReg, NextMBB ]
9046 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
9047 // [ %NextSrcReg, NextMBB ]
9048 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
9049 // [ %NextCountReg, NextMBB ]
9050 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
9051 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
9052 // ( JLH EndMBB )
9053 //
9054 // The prefetch is used only for MVC. The JLH is used only for CLC.
9055 MBB = LoopMBB;
9056 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
9057 .addReg(StartDestReg).addMBB(StartMBB)
9058 .addReg(NextDestReg).addMBB(NextMBB);
9059 if (!HaveSingleBase)
9060 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
9061 .addReg(StartSrcReg).addMBB(StartMBB)
9062 .addReg(NextSrcReg).addMBB(NextMBB);
9063 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
9064 .addReg(StartCountReg).addMBB(StartMBB)
9065 .addReg(NextCountReg).addMBB(NextMBB);
9066 if (Opcode == SystemZ::MVC)
9067 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
9069 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
9070 insertMemMemOp(MBB, MBB->end(),
9071 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
9072 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
9073 if (EndMBB) {
9074 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9076 .addMBB(EndMBB);
9077 MBB->addSuccessor(EndMBB);
9078 MBB->addSuccessor(NextMBB);
9079 }
9080
9081 // NextMBB:
9082 // %NextDestReg = LA 256(%ThisDestReg)
9083 // %NextSrcReg = LA 256(%ThisSrcReg)
9084 // %NextCountReg = AGHI %ThisCountReg, -1
9085 // CGHI %NextCountReg, 0
9086 // JLH LoopMBB
9087 // # fall through to DoneMBB
9088 //
9089 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
9090 MBB = NextMBB;
9091 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
9092 .addReg(ThisDestReg).addImm(256).addReg(0);
9093 if (!HaveSingleBase)
9094 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
9095 .addReg(ThisSrcReg).addImm(256).addReg(0);
9096 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
9097 .addReg(ThisCountReg).addImm(-1);
9098 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9099 .addReg(NextCountReg).addImm(0);
9100 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9102 .addMBB(LoopMBB);
9103 MBB->addSuccessor(LoopMBB);
9104 MBB->addSuccessor(DoneMBB);
9105
9106 MBB = DoneMBB;
9107 if (IsRegForm) {
9108 // DoneMBB:
9109 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
9110 // # Use EXecute Relative Long for the remainder of the bytes. The target
9111 // instruction of the EXRL will have a length field of 1 since 0 is an
9112 // illegal value. The number of bytes processed becomes (%LenAdjReg &
9113 // 0xff) + 1.
9114 // # Fall through to AllDoneMBB.
9115 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9116 Register RemDestReg = HaveSingleBase ? RemSrcReg
9117 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9118 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
9119 .addReg(StartDestReg).addMBB(StartMBB)
9120 .addReg(NextDestReg).addMBB(NextMBB);
9121 if (!HaveSingleBase)
9122 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
9123 .addReg(StartSrcReg).addMBB(StartMBB)
9124 .addReg(NextSrcReg).addMBB(NextMBB);
9125 if (IsMemset)
9126 insertMemMemOp(MBB, MBB->end(),
9127 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
9128 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
9129 MachineInstrBuilder EXRL_MIB =
9130 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
9131 .addImm(Opcode)
9132 .addReg(LenAdjReg)
9133 .addReg(RemDestReg).addImm(DestDisp)
9134 .addReg(RemSrcReg).addImm(SrcDisp);
9135 MBB->addSuccessor(AllDoneMBB);
9136 MBB = AllDoneMBB;
9137 if (Opcode != SystemZ::MVC) {
9138 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
9139 if (EndMBB)
9140 MBB->addLiveIn(SystemZ::CC);
9141 }
9142 }
9144 }
9145
9146 // Handle any remaining bytes with straight-line code.
9147 while (ImmLength > 0) {
9148 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
9149 // The previous iteration might have created out-of-range displacements.
9150 // Apply them using LA/LAY if so.
9151 foldDisplIfNeeded(DestBase, DestDisp);
9152 foldDisplIfNeeded(SrcBase, SrcDisp);
9153 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
9154 DestDisp += ThisLength;
9155 SrcDisp += ThisLength;
9156 ImmLength -= ThisLength;
9157 // If there's another CLC to go, branch to the end if a difference
9158 // was found.
9159 if (EndMBB && ImmLength > 0) {
9161 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9163 .addMBB(EndMBB);
9164 MBB->addSuccessor(EndMBB);
9165 MBB->addSuccessor(NextMBB);
9166 MBB = NextMBB;
9167 }
9168 }
9169 if (EndMBB) {
9170 MBB->addSuccessor(EndMBB);
9171 MBB = EndMBB;
9172 MBB->addLiveIn(SystemZ::CC);
9173 }
9174
9175 MI.eraseFromParent();
9176 return MBB;
9177}
9178
9179// Decompose string pseudo-instruction MI into a loop that continually performs
9180// Opcode until CC != 3.
9181MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
9182 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9183 MachineFunction &MF = *MBB->getParent();
9184 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9186 DebugLoc DL = MI.getDebugLoc();
9187
9188 uint64_t End1Reg = MI.getOperand(0).getReg();
9189 uint64_t Start1Reg = MI.getOperand(1).getReg();
9190 uint64_t Start2Reg = MI.getOperand(2).getReg();
9191 uint64_t CharReg = MI.getOperand(3).getReg();
9192
9193 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
9194 uint64_t This1Reg = MRI.createVirtualRegister(RC);
9195 uint64_t This2Reg = MRI.createVirtualRegister(RC);
9196 uint64_t End2Reg = MRI.createVirtualRegister(RC);
9197
9198 MachineBasicBlock *StartMBB = MBB;
9200 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9201
9202 // StartMBB:
9203 // # fall through to LoopMBB
9204 MBB->addSuccessor(LoopMBB);
9205
9206 // LoopMBB:
9207 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
9208 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
9209 // R0L = %CharReg
9210 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
9211 // JO LoopMBB
9212 // # fall through to DoneMBB
9213 //
9214 // The load of R0L can be hoisted by post-RA LICM.
9215 MBB = LoopMBB;
9216
9217 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
9218 .addReg(Start1Reg).addMBB(StartMBB)
9219 .addReg(End1Reg).addMBB(LoopMBB);
9220 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
9221 .addReg(Start2Reg).addMBB(StartMBB)
9222 .addReg(End2Reg).addMBB(LoopMBB);
9223 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
9224 BuildMI(MBB, DL, TII->get(Opcode))
9225 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
9226 .addReg(This1Reg).addReg(This2Reg);
9227 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9229 MBB->addSuccessor(LoopMBB);
9230 MBB->addSuccessor(DoneMBB);
9231
9232 DoneMBB->addLiveIn(SystemZ::CC);
9233
9234 MI.eraseFromParent();
9235 return DoneMBB;
9236}
9237
9238// Update TBEGIN instruction with final opcode and register clobbers.
9239MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
9240 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
9241 bool NoFloat) const {
9242 MachineFunction &MF = *MBB->getParent();
9243 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9244 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9245
9246 // Update opcode.
9247 MI.setDesc(TII->get(Opcode));
9248
9249 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
9250 // Make sure to add the corresponding GRSM bits if they are missing.
9251 uint64_t Control = MI.getOperand(2).getImm();
9252 static const unsigned GPRControlBit[16] = {
9253 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
9254 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
9255 };
9256 Control |= GPRControlBit[15];
9257 if (TFI->hasFP(MF))
9258 Control |= GPRControlBit[11];
9259 MI.getOperand(2).setImm(Control);
9260
9261 // Add GPR clobbers.
9262 for (int I = 0; I < 16; I++) {
9263 if ((Control & GPRControlBit[I]) == 0) {
9264 unsigned Reg = SystemZMC::GR64Regs[I];
9265 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9266 }
9267 }
9268
9269 // Add FPR/VR clobbers.
9270 if (!NoFloat && (Control & 4) != 0) {
9271 if (Subtarget.hasVector()) {
9272 for (unsigned Reg : SystemZMC::VR128Regs) {
9273 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9274 }
9275 } else {
9276 for (unsigned Reg : SystemZMC::FP64Regs) {
9277 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9278 }
9279 }
9280 }
9281
9282 return MBB;
9283}
9284
9285MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
9286 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9287 MachineFunction &MF = *MBB->getParent();
9289 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9290 DebugLoc DL = MI.getDebugLoc();
9291
9292 Register SrcReg = MI.getOperand(0).getReg();
9293
9294 // Create new virtual register of the same class as source.
9295 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
9296 Register DstReg = MRI->createVirtualRegister(RC);
9297
9298 // Replace pseudo with a normal load-and-test that models the def as
9299 // well.
9300 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
9301 .addReg(SrcReg)
9302 .setMIFlags(MI.getFlags());
9303 MI.eraseFromParent();
9304
9305 return MBB;
9306}
9307
9308MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
9310 MachineFunction &MF = *MBB->getParent();
9312 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9313 DebugLoc DL = MI.getDebugLoc();
9314 const unsigned ProbeSize = getStackProbeSize(MF);
9315 Register DstReg = MI.getOperand(0).getReg();
9316 Register SizeReg = MI.getOperand(2).getReg();
9317
9318 MachineBasicBlock *StartMBB = MBB;
9320 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
9321 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
9322 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
9323 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
9324
9327
9328 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9329 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9330
9331 // LoopTestMBB
9332 // BRC TailTestMBB
9333 // # fallthrough to LoopBodyMBB
9334 StartMBB->addSuccessor(LoopTestMBB);
9335 MBB = LoopTestMBB;
9336 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
9337 .addReg(SizeReg)
9338 .addMBB(StartMBB)
9339 .addReg(IncReg)
9340 .addMBB(LoopBodyMBB);
9341 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
9342 .addReg(PHIReg)
9343 .addImm(ProbeSize);
9344 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9346 .addMBB(TailTestMBB);
9347 MBB->addSuccessor(LoopBodyMBB);
9348 MBB->addSuccessor(TailTestMBB);
9349
9350 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
9351 // J LoopTestMBB
9352 MBB = LoopBodyMBB;
9353 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
9354 .addReg(PHIReg)
9355 .addImm(ProbeSize);
9356 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
9357 .addReg(SystemZ::R15D)
9358 .addImm(ProbeSize);
9359 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9360 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
9361 .setMemRefs(VolLdMMO);
9362 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
9363 MBB->addSuccessor(LoopTestMBB);
9364
9365 // TailTestMBB
9366 // BRC DoneMBB
9367 // # fallthrough to TailMBB
9368 MBB = TailTestMBB;
9369 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9370 .addReg(PHIReg)
9371 .addImm(0);
9372 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9374 .addMBB(DoneMBB);
9375 MBB->addSuccessor(TailMBB);
9376 MBB->addSuccessor(DoneMBB);
9377
9378 // TailMBB
9379 // # fallthrough to DoneMBB
9380 MBB = TailMBB;
9381 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
9382 .addReg(SystemZ::R15D)
9383 .addReg(PHIReg);
9384 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9385 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
9386 .setMemRefs(VolLdMMO);
9387 MBB->addSuccessor(DoneMBB);
9388
9389 // DoneMBB
9390 MBB = DoneMBB;
9391 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
9392 .addReg(SystemZ::R15D);
9393
9394 MI.eraseFromParent();
9395 return DoneMBB;
9396}
9397
9398SDValue SystemZTargetLowering::
9399getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
9401 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
9402 SDLoc DL(SP);
9403 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
9404 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
9405}
9406
9409 switch (MI.getOpcode()) {
9410 case SystemZ::ADJCALLSTACKDOWN:
9411 case SystemZ::ADJCALLSTACKUP:
9412 return emitAdjCallStack(MI, MBB);
9413
9414 case SystemZ::Select32:
9415 case SystemZ::Select64:
9416 case SystemZ::Select128:
9417 case SystemZ::SelectF32:
9418 case SystemZ::SelectF64:
9419 case SystemZ::SelectF128:
9420 case SystemZ::SelectVR32:
9421 case SystemZ::SelectVR64:
9422 case SystemZ::SelectVR128:
9423 return emitSelect(MI, MBB);
9424
9425 case SystemZ::CondStore8Mux:
9426 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
9427 case SystemZ::CondStore8MuxInv:
9428 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
9429 case SystemZ::CondStore16Mux:
9430 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
9431 case SystemZ::CondStore16MuxInv:
9432 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
9433 case SystemZ::CondStore32Mux:
9434 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
9435 case SystemZ::CondStore32MuxInv:
9436 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
9437 case SystemZ::CondStore8:
9438 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
9439 case SystemZ::CondStore8Inv:
9440 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
9441 case SystemZ::CondStore16:
9442 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
9443 case SystemZ::CondStore16Inv:
9444 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
9445 case SystemZ::CondStore32:
9446 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
9447 case SystemZ::CondStore32Inv:
9448 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
9449 case SystemZ::CondStore64:
9450 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
9451 case SystemZ::CondStore64Inv:
9452 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
9453 case SystemZ::CondStoreF32:
9454 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
9455 case SystemZ::CondStoreF32Inv:
9456 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
9457 case SystemZ::CondStoreF64:
9458 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
9459 case SystemZ::CondStoreF64Inv:
9460 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
9461
9462 case SystemZ::SCmp128Hi:
9463 return emitICmp128Hi(MI, MBB, false);
9464 case SystemZ::UCmp128Hi:
9465 return emitICmp128Hi(MI, MBB, true);
9466
9467 case SystemZ::PAIR128:
9468 return emitPair128(MI, MBB);
9469 case SystemZ::AEXT128:
9470 return emitExt128(MI, MBB, false);
9471 case SystemZ::ZEXT128:
9472 return emitExt128(MI, MBB, true);
9473
9474 case SystemZ::ATOMIC_SWAPW:
9475 return emitAtomicLoadBinary(MI, MBB, 0);
9476
9477 case SystemZ::ATOMIC_LOADW_AR:
9478 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
9479 case SystemZ::ATOMIC_LOADW_AFI:
9480 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
9481
9482 case SystemZ::ATOMIC_LOADW_SR:
9483 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
9484
9485 case SystemZ::ATOMIC_LOADW_NR:
9486 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
9487 case SystemZ::ATOMIC_LOADW_NILH:
9488 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
9489
9490 case SystemZ::ATOMIC_LOADW_OR:
9491 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
9492 case SystemZ::ATOMIC_LOADW_OILH:
9493 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
9494
9495 case SystemZ::ATOMIC_LOADW_XR:
9496 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
9497 case SystemZ::ATOMIC_LOADW_XILF:
9498 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
9499
9500 case SystemZ::ATOMIC_LOADW_NRi:
9501 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
9502 case SystemZ::ATOMIC_LOADW_NILHi:
9503 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
9504
9505 case SystemZ::ATOMIC_LOADW_MIN:
9506 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
9507 case SystemZ::ATOMIC_LOADW_MAX:
9508 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
9509 case SystemZ::ATOMIC_LOADW_UMIN:
9510 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
9511 case SystemZ::ATOMIC_LOADW_UMAX:
9512 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
9513
9514 case SystemZ::ATOMIC_CMP_SWAPW:
9515 return emitAtomicCmpSwapW(MI, MBB);
9516 case SystemZ::MVCImm:
9517 case SystemZ::MVCReg:
9518 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
9519 case SystemZ::NCImm:
9520 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
9521 case SystemZ::OCImm:
9522 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
9523 case SystemZ::XCImm:
9524 case SystemZ::XCReg:
9525 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
9526 case SystemZ::CLCImm:
9527 case SystemZ::CLCReg:
9528 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
9529 case SystemZ::MemsetImmImm:
9530 case SystemZ::MemsetImmReg:
9531 case SystemZ::MemsetRegImm:
9532 case SystemZ::MemsetRegReg:
9533 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
9534 case SystemZ::CLSTLoop:
9535 return emitStringWrapper(MI, MBB, SystemZ::CLST);
9536 case SystemZ::MVSTLoop:
9537 return emitStringWrapper(MI, MBB, SystemZ::MVST);
9538 case SystemZ::SRSTLoop:
9539 return emitStringWrapper(MI, MBB, SystemZ::SRST);
9540 case SystemZ::TBEGIN:
9541 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
9542 case SystemZ::TBEGIN_nofloat:
9543 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
9544 case SystemZ::TBEGINC:
9545 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
9546 case SystemZ::LTEBRCompare_Pseudo:
9547 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
9548 case SystemZ::LTDBRCompare_Pseudo:
9549 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
9550 case SystemZ::LTXBRCompare_Pseudo:
9551 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
9552
9553 case SystemZ::PROBED_ALLOCA:
9554 return emitProbedAlloca(MI, MBB);
9555
9556 case TargetOpcode::STACKMAP:
9557 case TargetOpcode::PATCHPOINT:
9558 return emitPatchPoint(MI, MBB);
9559
9560 default:
9561 llvm_unreachable("Unexpected instr type to insert");
9562 }
9563}
9564
9565// This is only used by the isel schedulers, and is needed only to prevent
9566// compiler from crashing when list-ilp is used.
9567const TargetRegisterClass *
9568SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
9569 if (VT == MVT::Untyped)
9570 return &SystemZ::ADDR128BitRegClass;
9572}
9573
9574SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
9575 SelectionDAG &DAG) const {
9576 SDLoc dl(Op);
9577 /*
9578 The rounding method is in FPC Byte 3 bits 6-7, and has the following
9579 settings:
9580 00 Round to nearest
9581 01 Round to 0
9582 10 Round to +inf
9583 11 Round to -inf
9584
9585 FLT_ROUNDS, on the other hand, expects the following:
9586 -1 Undefined
9587 0 Round to 0
9588 1 Round to nearest
9589 2 Round to +inf
9590 3 Round to -inf
9591 */
9592
9593 // Save FPC to register.
9594 SDValue Chain = Op.getOperand(0);
9595 SDValue EFPC(
9596 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
9597 Chain = EFPC.getValue(1);
9598
9599 // Transform as necessary
9600 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
9601 DAG.getConstant(3, dl, MVT::i32));
9602 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
9603 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
9604 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
9605 DAG.getConstant(1, dl, MVT::i32)));
9606
9607 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
9608 DAG.getConstant(1, dl, MVT::i32));
9609 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
9610
9611 return DAG.getMergeValues({RetVal, Chain}, dl);
9612}
9613
9614SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
9615 SelectionDAG &DAG) const {
9616 EVT VT = Op.getValueType();
9617 Op = Op.getOperand(0);
9618 EVT OpVT = Op.getValueType();
9619
9620 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
9621
9622 SDLoc DL(Op);
9623
9624 // load a 0 vector for the third operand of VSUM.
9625 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));
9626
9627 // execute VSUM.
9628 switch (OpVT.getScalarSizeInBits()) {
9629 case 8:
9630 case 16:
9631 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
9633 case 32:
9634 case 64:
9635 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
9636 DAG.getBitcast(Op.getValueType(), Zero));
9637 break;
9638 case 128:
9639 break; // VSUM over v1i128 should not happen and would be a noop
9640 default:
9641 llvm_unreachable("Unexpected scalar size.");
9642 }
9643 // Cast to original vector type, retrieve last element.
9644 return DAG.getNode(
9645 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),
9646 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));
9647}
unsigned const MachineRegisterInfo * MRI
#define Success
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
amdgpu AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
Function Alias Analysis Results
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:301
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
IRTranslator LLVM IR MI
iv Induction Variable Users
Definition: IVUsers.cpp:48
#define RegName(no)
lazy value info
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
uint64_t High
LLVMContext & Context
#define P(N)
const char LLVMTargetMachineRef TM
static bool isSelectPseudo(MachineInstr &MI)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
#define OPCODE(NAME)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1364
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition: APInt.h:300
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
@ Add
*p = old + v
Definition: Instructions.h:764
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ Xor
*p = old ^ v
Definition: Instructions.h:774
BinOp getOperation() const
Definition: Instructions.h:845
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
The address of a basic block.
Definition: Constants.h:889
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:701
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:713
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:263
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:675
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:563
bool hasPrivateLinkage() const
Definition: GlobalValue.h:527
bool hasInternalLinkage() const
Definition: GlobalValue.h:526
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:184
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setMaxCallFrameSize(unsigned S)
MachineFunctionProperties & reset(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr kills the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr fully defines the specified register.
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:721
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:477
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:731
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:827
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:471
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:861
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:472
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:772
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:675
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:767
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:468
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:798
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:844
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:484
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:738
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:553
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void reserve(size_type N)
Definition: SmallVector.h:676
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:466
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:680
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
iterator end() const
Definition: StringRef.h:113
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
A SystemZ-specific class detailing special use registers particular for calling conventions.
A SystemZ-specific constant pool value.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const
const TargetFrameLowering * getFrameLowering() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
const SystemZRegisterInfo * getRegisterInfo() const override
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
bool useSoftFloat() const override
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Let target indicate that an extending atomic load of the specified type is legal.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isFP128Ty() const
Return true if this is 'fp128'.
Definition: Type.h:163
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
use_iterator use_begin()
Definition: Value.h:360
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1132
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1128
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:723
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:476
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1275
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:714
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1161
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1277
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1247
@ STRICT_FCEIL
Definition: ISDOpcodes.h:426
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1278
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1037
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:483
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
Definition: ISDOpcodes.h:1260
@ STRICT_FMINIMUM
Definition: ISDOpcodes.h:436
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:790
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:688
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1234
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1239
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:820
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:477
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1273
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1274
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:411
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1406
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:450
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:620
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1227
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:994
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:930
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1083
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:327
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1276
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1062
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:507
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1243
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:222
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:627
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1157
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:323
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:430
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1370
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:880
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:651
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1271
@ STRICT_FMAXIMUM
Definition: ISDOpcodes.h:435
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:424
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:425
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:742
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1279
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:971
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1047
@ ConstantPool
Definition: ISDOpcodes.h:82
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:809
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:888
@ STRICT_FROUND
Definition: ISDOpcodes.h:428
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:736
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:303
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:449
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:427
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1269
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:443
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:465
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:442
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:990
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1270
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:836
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1188
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:470
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1214
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:400
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:524
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1268
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:869
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:831
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:423
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1076
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:763
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:493
@ AssertZext
Definition: ISDOpcodes.h:62
@ STRICT_FRINT
Definition: ISDOpcodes.h:422
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1326
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:515
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1529
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1509
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition: SystemZ.h:40
static bool isImmHH(uint64_t Val)
Definition: SystemZ.h:176
const unsigned CCMASK_TEND
Definition: SystemZ.h:97
const unsigned CCMASK_CS_EQ
Definition: SystemZ.h:67
const unsigned CCMASK_TBEGIN
Definition: SystemZ.h:92
const unsigned CCMASK_0
Definition: SystemZ.h:27
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition: SystemZ.h:82
const unsigned CCMASK_LOGICAL_CARRY
Definition: SystemZ.h:60
const unsigned TDCMASK_NORMAL_MINUS
Definition: SystemZ.h:122
const unsigned CCMASK_TDC
Definition: SystemZ.h:109
const unsigned CCMASK_FCMP
Definition: SystemZ.h:48
const unsigned CCMASK_TM_SOME_0
Definition: SystemZ.h:81
static bool isImmHL(uint64_t Val)
Definition: SystemZ.h:171
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition: SystemZ.h:124
const unsigned PFD_READ
Definition: SystemZ.h:115
const unsigned CCMASK_1
Definition: SystemZ.h:28
const unsigned TDCMASK_NORMAL_PLUS
Definition: SystemZ.h:121
const unsigned PFD_WRITE
Definition: SystemZ.h:116
const unsigned CCMASK_CMP_GT
Definition: SystemZ.h:37
const unsigned TDCMASK_QNAN_MINUS
Definition: SystemZ.h:128
const unsigned CCMASK_CS
Definition: SystemZ.h:69
const unsigned CCMASK_ANY
Definition: SystemZ.h:31
const unsigned CCMASK_ARITH
Definition: SystemZ.h:55
const unsigned CCMASK_TM_MIXED_MSB_0
Definition: SystemZ.h:78
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition: SystemZ.h:123
static bool isImmLL(uint64_t Val)
Definition: SystemZ.h:161
const unsigned VectorBits
Definition: SystemZ.h:154
static bool isImmLH(uint64_t Val)
Definition: SystemZ.h:166
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition: SystemZ.h:125
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition: SystemZ.h:77
const unsigned IPM_CC
Definition: SystemZ.h:112
const unsigned CCMASK_CMP_LE
Definition: SystemZ.h:39
const unsigned CCMASK_CMP_O
Definition: SystemZ.h:44
const unsigned CCMASK_CMP_EQ
Definition: SystemZ.h:35
const unsigned VectorBytes
Definition: SystemZ.h:158
const unsigned TDCMASK_INFINITY_MINUS
Definition: SystemZ.h:126
const unsigned CCMASK_ICMP
Definition: SystemZ.h:47
const unsigned CCMASK_VCMP_ALL
Definition: SystemZ.h:101
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition: SystemZ.h:104
const unsigned CCMASK_TM_MIXED_MSB_1
Definition: SystemZ.h:79
const unsigned CCMASK_TM_MSB_0
Definition: SystemZ.h:83
const unsigned CCMASK_ARITH_OVERFLOW
Definition: SystemZ.h:54
const unsigned CCMASK_CS_NE
Definition: SystemZ.h:68
const unsigned TDCMASK_SNAN_PLUS
Definition: SystemZ.h:129
const unsigned CCMASK_TM
Definition: SystemZ.h:85
const unsigned CCMASK_3
Definition: SystemZ.h:30
const unsigned CCMASK_CMP_LT
Definition: SystemZ.h:36
const unsigned CCMASK_CMP_NE
Definition: SystemZ.h:38
const unsigned TDCMASK_ZERO_PLUS
Definition: SystemZ.h:119
const unsigned TDCMASK_QNAN_PLUS
Definition: SystemZ.h:127
const unsigned TDCMASK_ZERO_MINUS
Definition: SystemZ.h:120
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition: SystemZ.h:80
const unsigned CCMASK_LOGICAL_BORROW
Definition: SystemZ.h:62
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition: SystemZ.h:43
const unsigned CCMASK_LOGICAL
Definition: SystemZ.h:64
const unsigned CCMASK_TM_MSB_1
Definition: SystemZ.h:84
const unsigned TDCMASK_SNAN_MINUS
Definition: SystemZ.h:130
@ GeneralDynamic
Definition: CodeGen.h:46
@ GS
Definition: X86.h:205
Reg
All possible values of the reg field in the ModR/M byte.
support::ulittle32_t Word
Definition: IRSymtab.h:52
NodeAddr< CodeNode * > Code
Definition: RDFGraph.h:388
constexpr const char32_t SBase
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:337
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Done
Definition: Threading.h:61
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ Mul
Product of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:465
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define EQ(a, b)
Definition: regexec.c:112
#define NC
Definition: regutils.h:42
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:182
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:168
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:307
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:176
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:141
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})