LLVM 19.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
24#include "llvm/IR/Intrinsics.h"
25#include "llvm/IR/IntrinsicsS390.h"
29#include <cctype>
30#include <optional>
31
32using namespace llvm;
33
34#define DEBUG_TYPE "systemz-lower"
35
36namespace {
37// Represents information about a comparison.
38struct Comparison {
39 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
40 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
41 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
42
43 // The operands to the comparison.
44 SDValue Op0, Op1;
45
46 // Chain if this is a strict floating-point comparison.
47 SDValue Chain;
48
49 // The opcode that should be used to compare Op0 and Op1.
50 unsigned Opcode;
51
52 // A SystemZICMP value. Only used for integer comparisons.
53 unsigned ICmpType;
54
55 // The mask of CC values that Opcode can produce.
56 unsigned CCValid;
57
58 // The mask of CC values for which the original condition is true.
59 unsigned CCMask;
60};
61} // end anonymous namespace
62
63// Classify VT as either 32 or 64 bit.
64static bool is32Bit(EVT VT) {
65 switch (VT.getSimpleVT().SimpleTy) {
66 case MVT::i32:
67 return true;
68 case MVT::i64:
69 return false;
70 default:
71 llvm_unreachable("Unsupported type");
72 }
73}
74
75// Return a version of MachineOperand that can be safely used before the
76// final use.
78 if (Op.isReg())
79 Op.setIsKill(false);
80 return Op;
81}
82
84 const SystemZSubtarget &STI)
85 : TargetLowering(TM), Subtarget(STI) {
86 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
87
88 auto *Regs = STI.getSpecialRegisters();
89
90 // Set up the register classes.
91 if (Subtarget.hasHighWord())
92 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
93 else
94 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
95 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
96 if (!useSoftFloat()) {
97 if (Subtarget.hasVector()) {
98 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
99 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
100 } else {
101 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
102 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
103 }
104 if (Subtarget.hasVectorEnhancements1())
105 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
106 else
107 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
108
109 if (Subtarget.hasVector()) {
110 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
111 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
112 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
113 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
114 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
115 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
116 }
117
118 if (Subtarget.hasVector())
119 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
120 }
121
122 // Compute derived properties from the register classes
124
125 // Set up special registers.
126 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
127
128 // TODO: It may be better to default to latency-oriented scheduling, however
129 // LLVM's current latency-oriented scheduler can't handle physreg definitions
130 // such as SystemZ has with CC, so set this to the register-pressure
131 // scheduler, because it can.
133
136
138
139 // Instructions are strings of 2-byte aligned 2-byte values.
141 // For performance reasons we prefer 16-byte alignment.
143
144 // Handle operations that are handled in a similar way for all types.
145 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
146 I <= MVT::LAST_FP_VALUETYPE;
147 ++I) {
149 if (isTypeLegal(VT)) {
150 // Lower SET_CC into an IPM-based sequence.
154
155 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
157
158 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
161 }
162 }
163
164 // Expand jump table branches as address arithmetic followed by an
165 // indirect jump.
167
168 // Expand BRCOND into a BR_CC (see above).
170
171 // Handle integer types except i128.
172 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
173 I <= MVT::LAST_INTEGER_VALUETYPE;
174 ++I) {
176 if (isTypeLegal(VT) && VT != MVT::i128) {
178
179 // Expand individual DIV and REMs into DIVREMs.
186
187 // Support addition/subtraction with overflow.
190
191 // Support addition/subtraction with carry.
194
195 // Support carry in as value rather than glue.
198
199 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
200 // available, or if the operand is constant.
202
203 // Use POPCNT on z196 and above.
204 if (Subtarget.hasPopulationCount())
206 else
208
209 // No special instructions for these.
212
213 // Use *MUL_LOHI where possible instead of MULH*.
218
219 // Only z196 and above have native support for conversions to unsigned.
220 // On z10, promoting to i64 doesn't generate an inexact condition for
221 // values that are outside the i32 range but in the i64 range, so use
222 // the default expansion.
223 if (!Subtarget.hasFPExtension())
225
226 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
227 // default to Expand, so need to be modified to Legal where appropriate.
229 if (Subtarget.hasFPExtension())
231
232 // And similarly for STRICT_[SU]INT_TO_FP.
234 if (Subtarget.hasFPExtension())
236 }
237 }
238
239 // Handle i128 if legal.
240 if (isTypeLegal(MVT::i128)) {
241 // No special instructions for these.
257
258 // Support addition/subtraction with carry.
263
264 // Use VPOPCT and add up partial results.
266
267 // We have to use libcalls for these.
276 }
277
278 // Type legalization will convert 8- and 16-bit atomic operations into
279 // forms that operate on i32s (but still keeping the original memory VT).
280 // Lower them into full i32 operations.
292
293 // Whether or not i128 is not a legal type, we need to custom lower
294 // the atomic operations in order to exploit SystemZ instructions.
299
300 // Mark sign/zero extending atomic loads as legal, which will make
301 // DAGCombiner fold extensions into atomic loads if possible.
303 {MVT::i8, MVT::i16, MVT::i32}, Legal);
305 {MVT::i8, MVT::i16}, Legal);
307 MVT::i8, Legal);
308
309 // We can use the CC result of compare-and-swap to implement
310 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
314
316
317 // Traps are legal, as we will convert them to "j .+2".
318 setOperationAction(ISD::TRAP, MVT::Other, Legal);
319
320 // z10 has instructions for signed but not unsigned FP conversion.
321 // Handle unsigned 32-bit types as signed 64-bit types.
322 if (!Subtarget.hasFPExtension()) {
327 }
328
329 // We have native support for a 64-bit CTLZ, via FLOGR.
333
334 // On z15 we have native support for a 64-bit CTPOP.
335 if (Subtarget.hasMiscellaneousExtensions3()) {
338 }
339
340 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
342
343 // Expand 128 bit shifts without using a libcall.
347 setLibcallName(RTLIB::SRL_I128, nullptr);
348 setLibcallName(RTLIB::SHL_I128, nullptr);
349 setLibcallName(RTLIB::SRA_I128, nullptr);
350
351 // Also expand 256 bit shifts if i128 is a legal type.
352 if (isTypeLegal(MVT::i128)) {
356 }
357
358 // Handle bitcast from fp128 to i128.
359 if (!isTypeLegal(MVT::i128))
361
362 // We have native instructions for i8, i16 and i32 extensions, but not i1.
364 for (MVT VT : MVT::integer_valuetypes()) {
368 }
369
370 // Handle the various types of symbolic address.
376
377 // We need to handle dynamic allocations specially because of the
378 // 160-byte area at the bottom of the stack.
381
384
385 // Handle prefetches with PFD or PFDRL.
387
388 // Handle readcyclecounter with STCKF.
390
392 // Assume by default that all vector operations need to be expanded.
393 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
394 if (getOperationAction(Opcode, VT) == Legal)
395 setOperationAction(Opcode, VT, Expand);
396
397 // Likewise all truncating stores and extending loads.
398 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
399 setTruncStoreAction(VT, InnerVT, Expand);
402 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
403 }
404
405 if (isTypeLegal(VT)) {
406 // These operations are legal for anything that can be stored in a
407 // vector register, even if there is no native support for the format
408 // as such. In particular, we can do these for v4f32 even though there
409 // are no specific instructions for that format.
415
416 // Likewise, except that we need to replace the nodes with something
417 // more specific.
420 }
421 }
422
423 // Handle integer vector types.
425 if (isTypeLegal(VT)) {
426 // These operations have direct equivalents.
431 if (VT != MVT::v2i64)
437 if (Subtarget.hasVectorEnhancements1())
439 else
443
444 // Convert a GPR scalar to a vector by inserting it into element 0.
446
447 // Use a series of unpacks for extensions.
450
451 // Detect shifts/rotates by a scalar amount and convert them into
452 // V*_BY_SCALAR.
457
458 // Add ISD::VECREDUCE_ADD as custom in order to implement
459 // it with VZERO+VSUM
461
462 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
463 // and inverting the result as necessary.
465 }
466 }
467
468 if (Subtarget.hasVector()) {
469 // There should be no need to check for float types other than v2f64
470 // since <2 x f32> isn't a legal type.
479
488 }
489
490 if (Subtarget.hasVectorEnhancements2()) {
499
508 }
509
510 // Handle floating-point types.
511 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
512 I <= MVT::LAST_FP_VALUETYPE;
513 ++I) {
515 if (isTypeLegal(VT)) {
516 // We can use FI for FRINT.
518
519 // We can use the extended form of FI for other rounding operations.
520 if (Subtarget.hasFPExtension()) {
526 }
527
528 // No special instructions for these.
534
535 // Special treatment.
537
538 // Handle constrained floating-point operations.
548 if (Subtarget.hasFPExtension()) {
554 }
555 }
556 }
557
558 // Handle floating-point vector types.
559 if (Subtarget.hasVector()) {
560 // Scalar-to-vector conversion is just a subreg.
563
564 // Some insertions and extractions can be done directly but others
565 // need to go via integers.
570
571 // These operations have direct equivalents.
572 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
573 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
574 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
575 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
576 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
577 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
578 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
579 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
580 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
583 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
586
587 // Handle constrained floating-point operations.
600
605 if (Subtarget.hasVectorEnhancements1()) {
608 }
609 }
610
611 // The vector enhancements facility 1 has instructions for these.
612 if (Subtarget.hasVectorEnhancements1()) {
613 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
614 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
615 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
616 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
617 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
618 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
619 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
620 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
621 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
624 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
627
632
637
642
647
652
653 // Handle constrained floating-point operations.
666 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
667 MVT::v4f32, MVT::v2f64 }) {
672 }
673 }
674
675 // We only have fused f128 multiply-addition on vector registers.
676 if (!Subtarget.hasVectorEnhancements1()) {
679 }
680
681 // We don't have a copysign instruction on vector registers.
682 if (Subtarget.hasVectorEnhancements1())
684
685 // Needed so that we don't try to implement f128 constant loads using
686 // a load-and-extend of a f80 constant (in cases where the constant
687 // would fit in an f80).
688 for (MVT VT : MVT::fp_valuetypes())
689 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
690
691 // We don't have extending load instruction on vector registers.
692 if (Subtarget.hasVectorEnhancements1()) {
693 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
694 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
695 }
696
697 // Floating-point truncation and stores need to be done separately.
698 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
699 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
700 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
701
702 // We have 64-bit FPR<->GPR moves, but need special handling for
703 // 32-bit forms.
704 if (!Subtarget.hasVector()) {
707 }
708
709 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
710 // structure, but VAEND is a no-op.
714
716
717 // Codes for which we want to perform some z-specific combinations.
721 ISD::LOAD,
732 ISD::SDIV,
733 ISD::UDIV,
734 ISD::SREM,
735 ISD::UREM,
738
739 // Handle intrinsics.
742
743 // We want to use MVC in preference to even a single load/store pair.
744 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
746
747 // The main memset sequence is a byte store followed by an MVC.
748 // Two STC or MV..I stores win over that, but the kind of fused stores
749 // generated by target-independent code don't when the byte value is
750 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
751 // than "STC;MVC". Handle the choice in target-specific code instead.
752 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
754
755 // Default to having -disable-strictnode-mutation on
756 IsStrictFPEnabled = true;
757
758 if (Subtarget.isTargetzOS()) {
759 struct RTLibCallMapping {
760 RTLIB::Libcall Code;
761 const char *Name;
762 };
763 static RTLibCallMapping RTLibCallCommon[] = {
764#define HANDLE_LIBCALL(code, name) {RTLIB::code, name},
765#include "ZOSLibcallNames.def"
766 };
767 for (auto &E : RTLibCallCommon)
768 setLibcallName(E.Code, E.Name);
769 }
770}
771
773 return Subtarget.hasSoftFloat();
774}
775
777 LLVMContext &, EVT VT) const {
778 if (!VT.isVector())
779 return MVT::i32;
781}
782
784 const MachineFunction &MF, EVT VT) const {
785 VT = VT.getScalarType();
786
787 if (!VT.isSimple())
788 return false;
789
790 switch (VT.getSimpleVT().SimpleTy) {
791 case MVT::f32:
792 case MVT::f64:
793 return true;
794 case MVT::f128:
795 return Subtarget.hasVectorEnhancements1();
796 default:
797 break;
798 }
799
800 return false;
801}
802
803// Return true if the constant can be generated with a vector instruction,
804// such as VGM, VGMB or VREPI.
806 const SystemZSubtarget &Subtarget) {
807 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
808 if (!Subtarget.hasVector() ||
809 (isFP128 && !Subtarget.hasVectorEnhancements1()))
810 return false;
811
812 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
813 // preferred way of creating all-zero and all-one vectors so give it
814 // priority over other methods below.
815 unsigned Mask = 0;
816 unsigned I = 0;
817 for (; I < SystemZ::VectorBytes; ++I) {
818 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
819 if (Byte == 0xff)
820 Mask |= 1ULL << I;
821 else if (Byte != 0)
822 break;
823 }
824 if (I == SystemZ::VectorBytes) {
826 OpVals.push_back(Mask);
828 return true;
829 }
830
831 if (SplatBitSize > 64)
832 return false;
833
834 auto tryValue = [&](uint64_t Value) -> bool {
835 // Try VECTOR REPLICATE IMMEDIATE
836 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
837 if (isInt<16>(SignedValue)) {
838 OpVals.push_back(((unsigned) SignedValue));
841 SystemZ::VectorBits / SplatBitSize);
842 return true;
843 }
844 // Try VECTOR GENERATE MASK
845 unsigned Start, End;
846 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
847 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
848 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
849 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
850 OpVals.push_back(Start - (64 - SplatBitSize));
851 OpVals.push_back(End - (64 - SplatBitSize));
854 SystemZ::VectorBits / SplatBitSize);
855 return true;
856 }
857 return false;
858 };
859
860 // First try assuming that any undefined bits above the highest set bit
861 // and below the lowest set bit are 1s. This increases the likelihood of
862 // being able to use a sign-extended element value in VECTOR REPLICATE
863 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
864 uint64_t SplatBitsZ = SplatBits.getZExtValue();
865 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
866 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
867 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
868 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
869 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
870 if (tryValue(SplatBitsZ | Upper | Lower))
871 return true;
872
873 // Now try assuming that any undefined bits between the first and
874 // last defined set bits are set. This increases the chances of
875 // using a non-wraparound mask.
876 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
877 return tryValue(SplatBitsZ | Middle);
878}
879
881 if (IntImm.isSingleWord()) {
882 IntBits = APInt(128, IntImm.getZExtValue());
883 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
884 } else
885 IntBits = IntImm;
886 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
887
888 // Find the smallest splat.
889 SplatBits = IntImm;
890 unsigned Width = SplatBits.getBitWidth();
891 while (Width > 8) {
892 unsigned HalfSize = Width / 2;
893 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
894 APInt LowValue = SplatBits.trunc(HalfSize);
895
896 // If the two halves do not match, stop here.
897 if (HighValue != LowValue || 8 > HalfSize)
898 break;
899
900 SplatBits = HighValue;
901 Width = HalfSize;
902 }
903 SplatUndef = 0;
904 SplatBitSize = Width;
905}
906
908 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
909 bool HasAnyUndefs;
910
911 // Get IntBits by finding the 128 bit splat.
912 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
913 true);
914
915 // Get SplatBits by finding the 8 bit or greater splat.
916 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
917 true);
918}
919
921 bool ForCodeSize) const {
922 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
923 if (Imm.isZero() || Imm.isNegZero())
924 return true;
925
927}
928
929/// Returns true if stack probing through inline assembly is requested.
931 // If the function specifically requests inline stack probes, emit them.
932 if (MF.getFunction().hasFnAttribute("probe-stack"))
933 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
934 "inline-asm";
935 return false;
936}
937
941}
942
946}
947
950 // Don't expand subword operations as they require special treatment.
951 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
953
954 // Don't expand if there is a target instruction available.
955 if (Subtarget.hasInterlockedAccess1() &&
956 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
963
965}
966
968 // We can use CGFI or CLGFI.
969 return isInt<32>(Imm) || isUInt<32>(Imm);
970}
971
973 // We can use ALGFI or SLGFI.
974 return isUInt<32>(Imm) || isUInt<32>(-Imm);
975}
976
978 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
979 // Unaligned accesses should never be slower than the expanded version.
980 // We check specifically for aligned accesses in the few cases where
981 // they are required.
982 if (Fast)
983 *Fast = 1;
984 return true;
985}
986
987// Information about the addressing mode for a memory access.
989 // True if a long displacement is supported.
991
992 // True if use of index register is supported.
994
995 AddressingMode(bool LongDispl, bool IdxReg) :
996 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
997};
998
999// Return the desired addressing mode for a Load which has only one use (in
1000// the same block) which is a Store.
1002 Type *Ty) {
1003 // With vector support a Load->Store combination may be combined to either
1004 // an MVC or vector operations and it seems to work best to allow the
1005 // vector addressing mode.
1006 if (HasVector)
1007 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1008
1009 // Otherwise only the MVC case is special.
1010 bool MVC = Ty->isIntegerTy(8);
1011 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1012}
1013
1014// Return the addressing mode which seems most desirable given an LLVM
1015// Instruction pointer.
1016static AddressingMode
1018 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1019 switch (II->getIntrinsicID()) {
1020 default: break;
1021 case Intrinsic::memset:
1022 case Intrinsic::memmove:
1023 case Intrinsic::memcpy:
1024 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1025 }
1026 }
1027
1028 if (isa<LoadInst>(I) && I->hasOneUse()) {
1029 auto *SingleUser = cast<Instruction>(*I->user_begin());
1030 if (SingleUser->getParent() == I->getParent()) {
1031 if (isa<ICmpInst>(SingleUser)) {
1032 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1033 if (C->getBitWidth() <= 64 &&
1034 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1035 // Comparison of memory with 16 bit signed / unsigned immediate
1036 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1037 } else if (isa<StoreInst>(SingleUser))
1038 // Load->Store
1039 return getLoadStoreAddrMode(HasVector, I->getType());
1040 }
1041 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1042 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1043 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1044 // Load->Store
1045 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1046 }
1047
1048 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1049
1050 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1051 // dependencies (LDE only supports small offsets).
1052 // * Utilize the vector registers to hold floating point
1053 // values (vector load / store instructions only support small
1054 // offsets).
1055
1056 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1057 I->getOperand(0)->getType());
1058 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1059 bool IsVectorAccess = MemAccessTy->isVectorTy();
1060
1061 // A store of an extracted vector element will be combined into a VSTE type
1062 // instruction.
1063 if (!IsVectorAccess && isa<StoreInst>(I)) {
1064 Value *DataOp = I->getOperand(0);
1065 if (isa<ExtractElementInst>(DataOp))
1066 IsVectorAccess = true;
1067 }
1068
1069 // A load which gets inserted into a vector element will be combined into a
1070 // VLE type instruction.
1071 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1072 User *LoadUser = *I->user_begin();
1073 if (isa<InsertElementInst>(LoadUser))
1074 IsVectorAccess = true;
1075 }
1076
1077 if (IsFPAccess || IsVectorAccess)
1078 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1079 }
1080
1081 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1082}
1083
1085 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1086 // Punt on globals for now, although they can be used in limited
1087 // RELATIVE LONG cases.
1088 if (AM.BaseGV)
1089 return false;
1090
1091 // Require a 20-bit signed offset.
1092 if (!isInt<20>(AM.BaseOffs))
1093 return false;
1094
1095 bool RequireD12 =
1096 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1097 AddressingMode SupportedAM(!RequireD12, true);
1098 if (I != nullptr)
1099 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1100
1101 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1102 return false;
1103
1104 if (!SupportedAM.IndexReg)
1105 // No indexing allowed.
1106 return AM.Scale == 0;
1107 else
1108 // Indexing is OK but no scale factor can be applied.
1109 return AM.Scale == 0 || AM.Scale == 1;
1110}
1111
1113 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
1114 unsigned SrcAS, const AttributeList &FuncAttributes) const {
1115 const int MVCFastLen = 16;
1116
1117 if (Limit != ~unsigned(0)) {
1118 // Don't expand Op into scalar loads/stores in these cases:
1119 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1120 return false; // Small memcpy: Use MVC
1121 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1122 return false; // Small memset (first byte with STC/MVI): Use MVC
1123 if (Op.isZeroMemset())
1124 return false; // Memset zero: Use XC
1125 }
1126
1127 return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
1128 SrcAS, FuncAttributes);
1129}
1130
1132 const AttributeList &FuncAttributes) const {
1133 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1134}
1135
1136bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1137 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1138 return false;
1139 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1140 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1141 return FromBits > ToBits;
1142}
1143
1145 if (!FromVT.isInteger() || !ToVT.isInteger())
1146 return false;
1147 unsigned FromBits = FromVT.getFixedSizeInBits();
1148 unsigned ToBits = ToVT.getFixedSizeInBits();
1149 return FromBits > ToBits;
1150}
1151
1152//===----------------------------------------------------------------------===//
1153// Inline asm support
1154//===----------------------------------------------------------------------===//
1155
1158 if (Constraint.size() == 1) {
1159 switch (Constraint[0]) {
1160 case 'a': // Address register
1161 case 'd': // Data register (equivalent to 'r')
1162 case 'f': // Floating-point register
1163 case 'h': // High-part register
1164 case 'r': // General-purpose register
1165 case 'v': // Vector register
1166 return C_RegisterClass;
1167
1168 case 'Q': // Memory with base and unsigned 12-bit displacement
1169 case 'R': // Likewise, plus an index
1170 case 'S': // Memory with base and signed 20-bit displacement
1171 case 'T': // Likewise, plus an index
1172 case 'm': // Equivalent to 'T'.
1173 return C_Memory;
1174
1175 case 'I': // Unsigned 8-bit constant
1176 case 'J': // Unsigned 12-bit constant
1177 case 'K': // Signed 16-bit constant
1178 case 'L': // Signed 20-bit displacement (on all targets we support)
1179 case 'M': // 0x7fffffff
1180 return C_Immediate;
1181
1182 default:
1183 break;
1184 }
1185 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1186 switch (Constraint[1]) {
1187 case 'Q': // Address with base and unsigned 12-bit displacement
1188 case 'R': // Likewise, plus an index
1189 case 'S': // Address with base and signed 20-bit displacement
1190 case 'T': // Likewise, plus an index
1191 return C_Address;
1192
1193 default:
1194 break;
1195 }
1196 }
1197 return TargetLowering::getConstraintType(Constraint);
1198}
1199
1202 const char *constraint) const {
1204 Value *CallOperandVal = info.CallOperandVal;
1205 // If we don't have a value, we can't do a match,
1206 // but allow it at the lowest weight.
1207 if (!CallOperandVal)
1208 return CW_Default;
1209 Type *type = CallOperandVal->getType();
1210 // Look at the constraint type.
1211 switch (*constraint) {
1212 default:
1214 break;
1215
1216 case 'a': // Address register
1217 case 'd': // Data register (equivalent to 'r')
1218 case 'h': // High-part register
1219 case 'r': // General-purpose register
1220 weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1221 break;
1222
1223 case 'f': // Floating-point register
1224 if (!useSoftFloat())
1225 weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1226 break;
1227
1228 case 'v': // Vector register
1229 if (Subtarget.hasVector())
1230 weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1231 : CW_Default;
1232 break;
1233
1234 case 'I': // Unsigned 8-bit constant
1235 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1236 if (isUInt<8>(C->getZExtValue()))
1237 weight = CW_Constant;
1238 break;
1239
1240 case 'J': // Unsigned 12-bit constant
1241 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1242 if (isUInt<12>(C->getZExtValue()))
1243 weight = CW_Constant;
1244 break;
1245
1246 case 'K': // Signed 16-bit constant
1247 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1248 if (isInt<16>(C->getSExtValue()))
1249 weight = CW_Constant;
1250 break;
1251
1252 case 'L': // Signed 20-bit displacement (on all targets we support)
1253 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1254 if (isInt<20>(C->getSExtValue()))
1255 weight = CW_Constant;
1256 break;
1257
1258 case 'M': // 0x7fffffff
1259 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1260 if (C->getZExtValue() == 0x7fffffff)
1261 weight = CW_Constant;
1262 break;
1263 }
1264 return weight;
1265}
1266
1267// Parse a "{tNNN}" register constraint for which the register type "t"
1268// has already been verified. MC is the class associated with "t" and
1269// Map maps 0-based register numbers to LLVM register numbers.
1270static std::pair<unsigned, const TargetRegisterClass *>
1272 const unsigned *Map, unsigned Size) {
1273 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1274 if (isdigit(Constraint[2])) {
1275 unsigned Index;
1276 bool Failed =
1277 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1278 if (!Failed && Index < Size && Map[Index])
1279 return std::make_pair(Map[Index], RC);
1280 }
1281 return std::make_pair(0U, nullptr);
1282}
1283
1284std::pair<unsigned, const TargetRegisterClass *>
1286 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1287 if (Constraint.size() == 1) {
1288 // GCC Constraint Letters
1289 switch (Constraint[0]) {
1290 default: break;
1291 case 'd': // Data register (equivalent to 'r')
1292 case 'r': // General-purpose register
1293 if (VT.getSizeInBits() == 64)
1294 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1295 else if (VT.getSizeInBits() == 128)
1296 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1297 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1298
1299 case 'a': // Address register
1300 if (VT == MVT::i64)
1301 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1302 else if (VT == MVT::i128)
1303 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1304 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1305
1306 case 'h': // High-part register (an LLVM extension)
1307 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1308
1309 case 'f': // Floating-point register
1310 if (!useSoftFloat()) {
1311 if (VT.getSizeInBits() == 64)
1312 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1313 else if (VT.getSizeInBits() == 128)
1314 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1315 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1316 }
1317 break;
1318
1319 case 'v': // Vector register
1320 if (Subtarget.hasVector()) {
1321 if (VT.getSizeInBits() == 32)
1322 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1323 if (VT.getSizeInBits() == 64)
1324 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1325 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1326 }
1327 break;
1328 }
1329 }
1330 if (Constraint.starts_with("{")) {
1331
1332 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1333 // to check the size on.
1334 auto getVTSizeInBits = [&VT]() {
1335 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1336 };
1337
1338 // We need to override the default register parsing for GPRs and FPRs
1339 // because the interpretation depends on VT. The internal names of
1340 // the registers are also different from the external names
1341 // (F0D and F0S instead of F0, etc.).
1342 if (Constraint[1] == 'r') {
1343 if (getVTSizeInBits() == 32)
1344 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1346 if (getVTSizeInBits() == 128)
1347 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1349 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1351 }
1352 if (Constraint[1] == 'f') {
1353 if (useSoftFloat())
1354 return std::make_pair(
1355 0u, static_cast<const TargetRegisterClass *>(nullptr));
1356 if (getVTSizeInBits() == 32)
1357 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1359 if (getVTSizeInBits() == 128)
1360 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1362 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1364 }
1365 if (Constraint[1] == 'v') {
1366 if (!Subtarget.hasVector())
1367 return std::make_pair(
1368 0u, static_cast<const TargetRegisterClass *>(nullptr));
1369 if (getVTSizeInBits() == 32)
1370 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1372 if (getVTSizeInBits() == 64)
1373 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1375 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1377 }
1378 }
1379 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1380}
1381
1382// FIXME? Maybe this could be a TableGen attribute on some registers and
1383// this table could be generated automatically from RegInfo.
1386 const MachineFunction &MF) const {
1387 Register Reg =
1389 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D : 0)
1390 .Case("r15", Subtarget.isTargetELF() ? SystemZ::R15D : 0)
1391 .Default(0);
1392
1393 if (Reg)
1394 return Reg;
1395 report_fatal_error("Invalid register name global variable");
1396}
1397
1399 const Constant *PersonalityFn) const {
1400 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1401}
1402
1404 const Constant *PersonalityFn) const {
1405 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1406}
1407
1409 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1410 SelectionDAG &DAG) const {
1411 // Only support length 1 constraints for now.
1412 if (Constraint.size() == 1) {
1413 switch (Constraint[0]) {
1414 case 'I': // Unsigned 8-bit constant
1415 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1416 if (isUInt<8>(C->getZExtValue()))
1417 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1418 Op.getValueType()));
1419 return;
1420
1421 case 'J': // Unsigned 12-bit constant
1422 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1423 if (isUInt<12>(C->getZExtValue()))
1424 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1425 Op.getValueType()));
1426 return;
1427
1428 case 'K': // Signed 16-bit constant
1429 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1430 if (isInt<16>(C->getSExtValue()))
1431 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1432 Op.getValueType()));
1433 return;
1434
1435 case 'L': // Signed 20-bit displacement (on all targets we support)
1436 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1437 if (isInt<20>(C->getSExtValue()))
1438 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1439 Op.getValueType()));
1440 return;
1441
1442 case 'M': // 0x7fffffff
1443 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1444 if (C->getZExtValue() == 0x7fffffff)
1445 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1446 Op.getValueType()));
1447 return;
1448 }
1449 }
1450 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1451}
1452
1453//===----------------------------------------------------------------------===//
1454// Calling conventions
1455//===----------------------------------------------------------------------===//
1456
1457#include "SystemZGenCallingConv.inc"
1458
1460 CallingConv::ID) const {
1461 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1462 SystemZ::R14D, 0 };
1463 return ScratchRegs;
1464}
1465
1467 Type *ToType) const {
1468 return isTruncateFree(FromType, ToType);
1469}
1470
1472 return CI->isTailCall();
1473}
1474
1475// Value is a value that has been passed to us in the location described by VA
1476// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1477// any loads onto Chain.
1479 CCValAssign &VA, SDValue Chain,
1480 SDValue Value) {
1481 // If the argument has been promoted from a smaller type, insert an
1482 // assertion to capture this.
1483 if (VA.getLocInfo() == CCValAssign::SExt)
1485 DAG.getValueType(VA.getValVT()));
1486 else if (VA.getLocInfo() == CCValAssign::ZExt)
1488 DAG.getValueType(VA.getValVT()));
1489
1490 if (VA.isExtInLoc())
1491 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1492 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1493 // If this is a short vector argument loaded from the stack,
1494 // extend from i64 to full vector size and then bitcast.
1495 assert(VA.getLocVT() == MVT::i64);
1496 assert(VA.getValVT().isVector());
1497 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1498 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1499 } else
1500 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1501 return Value;
1502}
1503
1504// Value is a value of type VA.getValVT() that we need to copy into
1505// the location described by VA. Return a copy of Value converted to
1506// VA.getValVT(). The caller is responsible for handling indirect values.
1508 CCValAssign &VA, SDValue Value) {
1509 switch (VA.getLocInfo()) {
1510 case CCValAssign::SExt:
1511 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1512 case CCValAssign::ZExt:
1513 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1514 case CCValAssign::AExt:
1515 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1516 case CCValAssign::BCvt: {
1517 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1518 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1519 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1520 // For an f32 vararg we need to first promote it to an f64 and then
1521 // bitcast it to an i64.
1522 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1523 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1524 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1525 ? MVT::v2i64
1526 : VA.getLocVT();
1527 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1528 // For ELF, this is a short vector argument to be stored to the stack,
1529 // bitcast to v2i64 and then extract first element.
1530 if (BitCastToType == MVT::v2i64)
1531 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1532 DAG.getConstant(0, DL, MVT::i32));
1533 return Value;
1534 }
1535 case CCValAssign::Full:
1536 return Value;
1537 default:
1538 llvm_unreachable("Unhandled getLocInfo()");
1539 }
1540}
1541
1543 SDLoc DL(In);
1544 SDValue Lo, Hi;
1545 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1546 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1547 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1548 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1549 DAG.getConstant(64, DL, MVT::i32)));
1550 } else {
1551 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1552 }
1553
1554 // FIXME: If v2i64 were a legal type, we could use it instead of
1555 // Untyped here. This might enable improved folding.
1556 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1557 MVT::Untyped, Hi, Lo);
1558 return SDValue(Pair, 0);
1559}
1560
1562 SDLoc DL(In);
1563 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1564 DL, MVT::i64, In);
1565 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1566 DL, MVT::i64, In);
1567
1568 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1569 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1570 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1571 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1572 DAG.getConstant(64, DL, MVT::i32));
1573 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1574 } else {
1575 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1576 }
1577}
1578
1580 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1581 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1582 EVT ValueVT = Val.getValueType();
1583 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1584 // Inline assembly operand.
1585 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1586 return true;
1587 }
1588
1589 return false;
1590}
1591
1593 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1594 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1595 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1596 // Inline assembly operand.
1597 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1598 return DAG.getBitcast(ValueVT, Res);
1599 }
1600
1601 return SDValue();
1602}
1603
1605 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1606 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1607 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1609 MachineFrameInfo &MFI = MF.getFrameInfo();
1611 SystemZMachineFunctionInfo *FuncInfo =
1613 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1614 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1615
1616 // Assign locations to all of the incoming arguments.
1618 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1619 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1620 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
1621
1622 unsigned NumFixedGPRs = 0;
1623 unsigned NumFixedFPRs = 0;
1624 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1625 SDValue ArgValue;
1626 CCValAssign &VA = ArgLocs[I];
1627 EVT LocVT = VA.getLocVT();
1628 if (VA.isRegLoc()) {
1629 // Arguments passed in registers
1630 const TargetRegisterClass *RC;
1631 switch (LocVT.getSimpleVT().SimpleTy) {
1632 default:
1633 // Integers smaller than i64 should be promoted to i64.
1634 llvm_unreachable("Unexpected argument type");
1635 case MVT::i32:
1636 NumFixedGPRs += 1;
1637 RC = &SystemZ::GR32BitRegClass;
1638 break;
1639 case MVT::i64:
1640 NumFixedGPRs += 1;
1641 RC = &SystemZ::GR64BitRegClass;
1642 break;
1643 case MVT::f32:
1644 NumFixedFPRs += 1;
1645 RC = &SystemZ::FP32BitRegClass;
1646 break;
1647 case MVT::f64:
1648 NumFixedFPRs += 1;
1649 RC = &SystemZ::FP64BitRegClass;
1650 break;
1651 case MVT::f128:
1652 NumFixedFPRs += 2;
1653 RC = &SystemZ::FP128BitRegClass;
1654 break;
1655 case MVT::v16i8:
1656 case MVT::v8i16:
1657 case MVT::v4i32:
1658 case MVT::v2i64:
1659 case MVT::v4f32:
1660 case MVT::v2f64:
1661 RC = &SystemZ::VR128BitRegClass;
1662 break;
1663 }
1664
1665 Register VReg = MRI.createVirtualRegister(RC);
1666 MRI.addLiveIn(VA.getLocReg(), VReg);
1667 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1668 } else {
1669 assert(VA.isMemLoc() && "Argument not register or memory");
1670
1671 // Create the frame index object for this incoming parameter.
1672 // FIXME: Pre-include call frame size in the offset, should not
1673 // need to manually add it here.
1674 int64_t ArgSPOffset = VA.getLocMemOffset();
1675 if (Subtarget.isTargetXPLINK64()) {
1676 auto &XPRegs =
1678 ArgSPOffset += XPRegs.getCallFrameSize();
1679 }
1680 int FI =
1681 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1682
1683 // Create the SelectionDAG nodes corresponding to a load
1684 // from this parameter. Unpromoted ints and floats are
1685 // passed as right-justified 8-byte values.
1686 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1687 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1688 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1689 DAG.getIntPtrConstant(4, DL));
1690 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1692 }
1693
1694 // Convert the value of the argument register into the value that's
1695 // being passed.
1696 if (VA.getLocInfo() == CCValAssign::Indirect) {
1697 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1699 // If the original argument was split (e.g. i128), we need
1700 // to load all parts of it here (using the same address).
1701 unsigned ArgIndex = Ins[I].OrigArgIndex;
1702 assert (Ins[I].PartOffset == 0);
1703 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1704 CCValAssign &PartVA = ArgLocs[I + 1];
1705 unsigned PartOffset = Ins[I + 1].PartOffset;
1706 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1707 DAG.getIntPtrConstant(PartOffset, DL));
1708 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1710 ++I;
1711 }
1712 } else
1713 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1714 }
1715
1716 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
1717 // Save the number of non-varargs registers for later use by va_start, etc.
1718 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1719 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1720
1721 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1722 Subtarget.getSpecialRegisters());
1723
1724 // Likewise the address (in the form of a frame index) of where the
1725 // first stack vararg would be. The 1-byte size here is arbitrary.
1726 // FIXME: Pre-include call frame size in the offset, should not
1727 // need to manually add it here.
1728 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
1729 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
1730 FuncInfo->setVarArgsFrameIndex(FI);
1731 }
1732
1733 if (IsVarArg && Subtarget.isTargetELF()) {
1734 // Save the number of non-varargs registers for later use by va_start, etc.
1735 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1736 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1737
1738 // Likewise the address (in the form of a frame index) of where the
1739 // first stack vararg would be. The 1-byte size here is arbitrary.
1740 int64_t VarArgsOffset = CCInfo.getStackSize();
1741 FuncInfo->setVarArgsFrameIndex(
1742 MFI.CreateFixedObject(1, VarArgsOffset, true));
1743
1744 // ...and a similar frame index for the caller-allocated save area
1745 // that will be used to store the incoming registers.
1746 int64_t RegSaveOffset =
1747 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1748 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1749 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1750
1751 // Store the FPR varargs in the reserved frame slots. (We store the
1752 // GPRs as part of the prologue.)
1753 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1755 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1756 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1757 int FI =
1759 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1761 &SystemZ::FP64BitRegClass);
1762 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1763 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1765 }
1766 // Join the stores, which are independent of one another.
1767 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1768 ArrayRef(&MemOps[NumFixedFPRs],
1769 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
1770 }
1771 }
1772
1773 if (Subtarget.isTargetXPLINK64()) {
1774 // Create virual register for handling incoming "ADA" special register (R5)
1775 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
1776 Register ADAvReg = MRI.createVirtualRegister(RC);
1777 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1778 Subtarget.getSpecialRegisters());
1779 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
1780 FuncInfo->setADAVirtualRegister(ADAvReg);
1781 }
1782 return Chain;
1783}
1784
1785static bool canUseSiblingCall(const CCState &ArgCCInfo,
1788 // Punt if there are any indirect or stack arguments, or if the call
1789 // needs the callee-saved argument register R6, or if the call uses
1790 // the callee-saved register arguments SwiftSelf and SwiftError.
1791 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1792 CCValAssign &VA = ArgLocs[I];
1794 return false;
1795 if (!VA.isRegLoc())
1796 return false;
1797 Register Reg = VA.getLocReg();
1798 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1799 return false;
1800 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1801 return false;
1802 }
1803 return true;
1804}
1805
1807 unsigned Offset, bool LoadAdr = false) {
1810 unsigned ADAvReg = MFI->getADAVirtualRegister();
1812
1813 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
1814 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
1815
1816 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
1817 if (!LoadAdr)
1818 Result = DAG.getLoad(
1819 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
1821
1822 return Result;
1823}
1824
1825// ADA access using Global value
1826// Note: for functions, address of descriptor is returned
1828 EVT PtrVT) {
1829 unsigned ADAtype;
1830 bool LoadAddr = false;
1831 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
1832 bool IsFunction =
1833 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
1834 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
1835
1836 if (IsFunction) {
1837 if (IsInternal) {
1839 LoadAddr = true;
1840 } else
1842 } else {
1844 }
1845 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
1846
1847 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
1848}
1849
1850static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
1851 SDLoc &DL, SDValue &Chain) {
1852 unsigned ADADelta = 0; // ADA offset in desc.
1853 unsigned EPADelta = 8; // EPA offset in desc.
1856
1857 // XPLink calling convention.
1858 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1859 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
1860 G->getGlobal()->hasPrivateLinkage());
1861 if (IsInternal) {
1864 unsigned ADAvReg = MFI->getADAVirtualRegister();
1865 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
1866 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1867 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1868 return true;
1869 } else {
1871 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
1872 ADA = getADAEntry(DAG, GA, DL, ADADelta);
1873 Callee = getADAEntry(DAG, GA, DL, EPADelta);
1874 }
1875 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1877 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
1878 ADA = getADAEntry(DAG, ES, DL, ADADelta);
1879 Callee = getADAEntry(DAG, ES, DL, EPADelta);
1880 } else {
1881 // Function pointer case
1882 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
1883 DAG.getConstant(ADADelta, DL, PtrVT));
1884 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
1886 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
1887 DAG.getConstant(EPADelta, DL, PtrVT));
1888 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
1890 }
1891 return false;
1892}
1893
1894SDValue
1896 SmallVectorImpl<SDValue> &InVals) const {
1897 SelectionDAG &DAG = CLI.DAG;
1898 SDLoc &DL = CLI.DL;
1900 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1902 SDValue Chain = CLI.Chain;
1903 SDValue Callee = CLI.Callee;
1904 bool &IsTailCall = CLI.IsTailCall;
1905 CallingConv::ID CallConv = CLI.CallConv;
1906 bool IsVarArg = CLI.IsVarArg;
1908 EVT PtrVT = getPointerTy(MF.getDataLayout());
1909 LLVMContext &Ctx = *DAG.getContext();
1911
1912 // FIXME: z/OS support to be added in later.
1913 if (Subtarget.isTargetXPLINK64())
1914 IsTailCall = false;
1915
1916 // Analyze the operands of the call, assigning locations to each operand.
1918 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1919 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1920
1921 // We don't support GuaranteedTailCallOpt, only automatically-detected
1922 // sibling calls.
1923 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1924 IsTailCall = false;
1925
1926 // Get a count of how many bytes are to be pushed on the stack.
1927 unsigned NumBytes = ArgCCInfo.getStackSize();
1928
1929 // Mark the start of the call.
1930 if (!IsTailCall)
1931 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1932
1933 // Copy argument values to their designated locations.
1935 SmallVector<SDValue, 8> MemOpChains;
1936 SDValue StackPtr;
1937 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1938 CCValAssign &VA = ArgLocs[I];
1939 SDValue ArgValue = OutVals[I];
1940
1941 if (VA.getLocInfo() == CCValAssign::Indirect) {
1942 // Store the argument in a stack slot and pass its address.
1943 unsigned ArgIndex = Outs[I].OrigArgIndex;
1944 EVT SlotVT;
1945 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1946 // Allocate the full stack space for a promoted (and split) argument.
1947 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1948 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1949 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1950 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1951 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1952 } else {
1953 SlotVT = Outs[I].VT;
1954 }
1955 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1956 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1957 MemOpChains.push_back(
1958 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1960 // If the original argument was split (e.g. i128), we need
1961 // to store all parts of it here (and pass just one address).
1962 assert (Outs[I].PartOffset == 0);
1963 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1964 SDValue PartValue = OutVals[I + 1];
1965 unsigned PartOffset = Outs[I + 1].PartOffset;
1966 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1967 DAG.getIntPtrConstant(PartOffset, DL));
1968 MemOpChains.push_back(
1969 DAG.getStore(Chain, DL, PartValue, Address,
1971 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
1972 SlotVT.getStoreSize()) && "Not enough space for argument part!");
1973 ++I;
1974 }
1975 ArgValue = SpillSlot;
1976 } else
1977 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1978
1979 if (VA.isRegLoc()) {
1980 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
1981 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
1982 // and low values.
1983 if (VA.getLocVT() == MVT::i128)
1984 ArgValue = lowerI128ToGR128(DAG, ArgValue);
1985 // Queue up the argument copies and emit them at the end.
1986 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1987 } else {
1988 assert(VA.isMemLoc() && "Argument not register or memory");
1989
1990 // Work out the address of the stack slot. Unpromoted ints and
1991 // floats are passed as right-justified 8-byte values.
1992 if (!StackPtr.getNode())
1993 StackPtr = DAG.getCopyFromReg(Chain, DL,
1994 Regs->getStackPointerRegister(), PtrVT);
1995 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
1996 VA.getLocMemOffset();
1997 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1998 Offset += 4;
1999 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2001
2002 // Emit the store.
2003 MemOpChains.push_back(
2004 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2005
2006 // Although long doubles or vectors are passed through the stack when
2007 // they are vararg (non-fixed arguments), if a long double or vector
2008 // occupies the third and fourth slot of the argument list GPR3 should
2009 // still shadow the third slot of the argument list.
2010 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2011 SDValue ShadowArgValue =
2012 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2013 DAG.getIntPtrConstant(1, DL));
2014 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2015 }
2016 }
2017 }
2018
2019 // Join the stores, which are independent of one another.
2020 if (!MemOpChains.empty())
2021 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2022
2023 // Accept direct calls by converting symbolic call addresses to the
2024 // associated Target* opcodes. Force %r1 to be used for indirect
2025 // tail calls.
2026 SDValue Glue;
2027
2028 if (Subtarget.isTargetXPLINK64()) {
2029 SDValue ADA;
2030 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2031 if (!IsBRASL) {
2032 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2033 ->getAddressOfCalleeRegister();
2034 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2035 Glue = Chain.getValue(1);
2036 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2037 }
2038 RegsToPass.push_back(std::make_pair(
2039 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2040 } else {
2041 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2042 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2043 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2044 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2045 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2046 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2047 } else if (IsTailCall) {
2048 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2049 Glue = Chain.getValue(1);
2050 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2051 }
2052 }
2053
2054 // Build a sequence of copy-to-reg nodes, chained and glued together.
2055 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
2056 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
2057 RegsToPass[I].second, Glue);
2058 Glue = Chain.getValue(1);
2059 }
2060
2061 // The first call operand is the chain and the second is the target address.
2063 Ops.push_back(Chain);
2064 Ops.push_back(Callee);
2065
2066 // Add argument registers to the end of the list so that they are
2067 // known live into the call.
2068 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
2069 Ops.push_back(DAG.getRegister(RegsToPass[I].first,
2070 RegsToPass[I].second.getValueType()));
2071
2072 // Add a register mask operand representing the call-preserved registers.
2073 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2074 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2075 assert(Mask && "Missing call preserved mask for calling convention");
2076 Ops.push_back(DAG.getRegisterMask(Mask));
2077
2078 // Glue the call to the argument copies, if any.
2079 if (Glue.getNode())
2080 Ops.push_back(Glue);
2081
2082 // Emit the call.
2083 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2084 if (IsTailCall) {
2085 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2086 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2087 return Ret;
2088 }
2089 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2090 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2091 Glue = Chain.getValue(1);
2092
2093 // Mark the end of the call, which is glued to the call itself.
2094 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2095 Glue = Chain.getValue(1);
2096
2097 // Assign locations to each value returned by this call.
2099 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2100 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2101
2102 // Copy all of the result registers out of their specified physreg.
2103 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2104 CCValAssign &VA = RetLocs[I];
2105
2106 // Copy the value out, gluing the copy to the end of the call sequence.
2107 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2108 VA.getLocVT(), Glue);
2109 Chain = RetValue.getValue(1);
2110 Glue = RetValue.getValue(2);
2111
2112 // Convert the value of the return register into the value that's
2113 // being returned.
2114 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2115 }
2116
2117 return Chain;
2118}
2119
2120// Generate a call taking the given operands as arguments and returning a
2121// result of type RetVT.
2123 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2124 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2125 bool DoesNotReturn, bool IsReturnValueUsed) const {
2127 Args.reserve(Ops.size());
2128
2130 for (SDValue Op : Ops) {
2131 Entry.Node = Op;
2132 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
2133 Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
2134 Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
2135 Args.push_back(Entry);
2136 }
2137
2138 SDValue Callee =
2139 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2140
2141 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2143 bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned);
2144 CLI.setDebugLoc(DL)
2145 .setChain(Chain)
2146 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2147 .setNoReturn(DoesNotReturn)
2148 .setDiscardResult(!IsReturnValueUsed)
2149 .setSExtResult(SignExtend)
2150 .setZExtResult(!SignExtend);
2151 return LowerCallTo(CLI);
2152}
2153
2156 MachineFunction &MF, bool isVarArg,
2158 LLVMContext &Context) const {
2159 // Special case that we cannot easily detect in RetCC_SystemZ since
2160 // i128 may not be a legal type.
2161 for (auto &Out : Outs)
2162 if (Out.ArgVT == MVT::i128)
2163 return false;
2164
2166 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
2167 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2168}
2169
2170SDValue
2172 bool IsVarArg,
2174 const SmallVectorImpl<SDValue> &OutVals,
2175 const SDLoc &DL, SelectionDAG &DAG) const {
2177
2178 // Assign locations to each returned value.
2180 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2181 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2182
2183 // Quick exit for void returns
2184 if (RetLocs.empty())
2185 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2186
2187 if (CallConv == CallingConv::GHC)
2188 report_fatal_error("GHC functions return void only");
2189
2190 // Copy the result values into the output registers.
2191 SDValue Glue;
2193 RetOps.push_back(Chain);
2194 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2195 CCValAssign &VA = RetLocs[I];
2196 SDValue RetValue = OutVals[I];
2197
2198 // Make the return register live on exit.
2199 assert(VA.isRegLoc() && "Can only return in registers!");
2200
2201 // Promote the value as required.
2202 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2203
2204 // Chain and glue the copies together.
2205 Register Reg = VA.getLocReg();
2206 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2207 Glue = Chain.getValue(1);
2208 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2209 }
2210
2211 // Update chain and glue.
2212 RetOps[0] = Chain;
2213 if (Glue.getNode())
2214 RetOps.push_back(Glue);
2215
2216 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2217}
2218
2219// Return true if Op is an intrinsic node with chain that returns the CC value
2220// as its only (other) argument. Provide the associated SystemZISD opcode and
2221// the mask of valid CC values if so.
2222static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2223 unsigned &CCValid) {
2224 unsigned Id = Op.getConstantOperandVal(1);
2225 switch (Id) {
2226 case Intrinsic::s390_tbegin:
2227 Opcode = SystemZISD::TBEGIN;
2228 CCValid = SystemZ::CCMASK_TBEGIN;
2229 return true;
2230
2231 case Intrinsic::s390_tbegin_nofloat:
2233 CCValid = SystemZ::CCMASK_TBEGIN;
2234 return true;
2235
2236 case Intrinsic::s390_tend:
2237 Opcode = SystemZISD::TEND;
2238 CCValid = SystemZ::CCMASK_TEND;
2239 return true;
2240
2241 default:
2242 return false;
2243 }
2244}
2245
2246// Return true if Op is an intrinsic node without chain that returns the
2247// CC value as its final argument. Provide the associated SystemZISD
2248// opcode and the mask of valid CC values if so.
2249static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2250 unsigned Id = Op.getConstantOperandVal(0);
2251 switch (Id) {
2252 case Intrinsic::s390_vpkshs:
2253 case Intrinsic::s390_vpksfs:
2254 case Intrinsic::s390_vpksgs:
2255 Opcode = SystemZISD::PACKS_CC;
2256 CCValid = SystemZ::CCMASK_VCMP;
2257 return true;
2258
2259 case Intrinsic::s390_vpklshs:
2260 case Intrinsic::s390_vpklsfs:
2261 case Intrinsic::s390_vpklsgs:
2262 Opcode = SystemZISD::PACKLS_CC;
2263 CCValid = SystemZ::CCMASK_VCMP;
2264 return true;
2265
2266 case Intrinsic::s390_vceqbs:
2267 case Intrinsic::s390_vceqhs:
2268 case Intrinsic::s390_vceqfs:
2269 case Intrinsic::s390_vceqgs:
2270 Opcode = SystemZISD::VICMPES;
2271 CCValid = SystemZ::CCMASK_VCMP;
2272 return true;
2273
2274 case Intrinsic::s390_vchbs:
2275 case Intrinsic::s390_vchhs:
2276 case Intrinsic::s390_vchfs:
2277 case Intrinsic::s390_vchgs:
2278 Opcode = SystemZISD::VICMPHS;
2279 CCValid = SystemZ::CCMASK_VCMP;
2280 return true;
2281
2282 case Intrinsic::s390_vchlbs:
2283 case Intrinsic::s390_vchlhs:
2284 case Intrinsic::s390_vchlfs:
2285 case Intrinsic::s390_vchlgs:
2286 Opcode = SystemZISD::VICMPHLS;
2287 CCValid = SystemZ::CCMASK_VCMP;
2288 return true;
2289
2290 case Intrinsic::s390_vtm:
2291 Opcode = SystemZISD::VTM;
2292 CCValid = SystemZ::CCMASK_VCMP;
2293 return true;
2294
2295 case Intrinsic::s390_vfaebs:
2296 case Intrinsic::s390_vfaehs:
2297 case Intrinsic::s390_vfaefs:
2298 Opcode = SystemZISD::VFAE_CC;
2299 CCValid = SystemZ::CCMASK_ANY;
2300 return true;
2301
2302 case Intrinsic::s390_vfaezbs:
2303 case Intrinsic::s390_vfaezhs:
2304 case Intrinsic::s390_vfaezfs:
2305 Opcode = SystemZISD::VFAEZ_CC;
2306 CCValid = SystemZ::CCMASK_ANY;
2307 return true;
2308
2309 case Intrinsic::s390_vfeebs:
2310 case Intrinsic::s390_vfeehs:
2311 case Intrinsic::s390_vfeefs:
2312 Opcode = SystemZISD::VFEE_CC;
2313 CCValid = SystemZ::CCMASK_ANY;
2314 return true;
2315
2316 case Intrinsic::s390_vfeezbs:
2317 case Intrinsic::s390_vfeezhs:
2318 case Intrinsic::s390_vfeezfs:
2319 Opcode = SystemZISD::VFEEZ_CC;
2320 CCValid = SystemZ::CCMASK_ANY;
2321 return true;
2322
2323 case Intrinsic::s390_vfenebs:
2324 case Intrinsic::s390_vfenehs:
2325 case Intrinsic::s390_vfenefs:
2326 Opcode = SystemZISD::VFENE_CC;
2327 CCValid = SystemZ::CCMASK_ANY;
2328 return true;
2329
2330 case Intrinsic::s390_vfenezbs:
2331 case Intrinsic::s390_vfenezhs:
2332 case Intrinsic::s390_vfenezfs:
2333 Opcode = SystemZISD::VFENEZ_CC;
2334 CCValid = SystemZ::CCMASK_ANY;
2335 return true;
2336
2337 case Intrinsic::s390_vistrbs:
2338 case Intrinsic::s390_vistrhs:
2339 case Intrinsic::s390_vistrfs:
2340 Opcode = SystemZISD::VISTR_CC;
2342 return true;
2343
2344 case Intrinsic::s390_vstrcbs:
2345 case Intrinsic::s390_vstrchs:
2346 case Intrinsic::s390_vstrcfs:
2347 Opcode = SystemZISD::VSTRC_CC;
2348 CCValid = SystemZ::CCMASK_ANY;
2349 return true;
2350
2351 case Intrinsic::s390_vstrczbs:
2352 case Intrinsic::s390_vstrczhs:
2353 case Intrinsic::s390_vstrczfs:
2354 Opcode = SystemZISD::VSTRCZ_CC;
2355 CCValid = SystemZ::CCMASK_ANY;
2356 return true;
2357
2358 case Intrinsic::s390_vstrsb:
2359 case Intrinsic::s390_vstrsh:
2360 case Intrinsic::s390_vstrsf:
2361 Opcode = SystemZISD::VSTRS_CC;
2362 CCValid = SystemZ::CCMASK_ANY;
2363 return true;
2364
2365 case Intrinsic::s390_vstrszb:
2366 case Intrinsic::s390_vstrszh:
2367 case Intrinsic::s390_vstrszf:
2368 Opcode = SystemZISD::VSTRSZ_CC;
2369 CCValid = SystemZ::CCMASK_ANY;
2370 return true;
2371
2372 case Intrinsic::s390_vfcedbs:
2373 case Intrinsic::s390_vfcesbs:
2374 Opcode = SystemZISD::VFCMPES;
2375 CCValid = SystemZ::CCMASK_VCMP;
2376 return true;
2377
2378 case Intrinsic::s390_vfchdbs:
2379 case Intrinsic::s390_vfchsbs:
2380 Opcode = SystemZISD::VFCMPHS;
2381 CCValid = SystemZ::CCMASK_VCMP;
2382 return true;
2383
2384 case Intrinsic::s390_vfchedbs:
2385 case Intrinsic::s390_vfchesbs:
2386 Opcode = SystemZISD::VFCMPHES;
2387 CCValid = SystemZ::CCMASK_VCMP;
2388 return true;
2389
2390 case Intrinsic::s390_vftcidb:
2391 case Intrinsic::s390_vftcisb:
2392 Opcode = SystemZISD::VFTCI;
2393 CCValid = SystemZ::CCMASK_VCMP;
2394 return true;
2395
2396 case Intrinsic::s390_tdc:
2397 Opcode = SystemZISD::TDC;
2398 CCValid = SystemZ::CCMASK_TDC;
2399 return true;
2400
2401 default:
2402 return false;
2403 }
2404}
2405
2406// Emit an intrinsic with chain and an explicit CC register result.
2408 unsigned Opcode) {
2409 // Copy all operands except the intrinsic ID.
2410 unsigned NumOps = Op.getNumOperands();
2412 Ops.reserve(NumOps - 1);
2413 Ops.push_back(Op.getOperand(0));
2414 for (unsigned I = 2; I < NumOps; ++I)
2415 Ops.push_back(Op.getOperand(I));
2416
2417 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2418 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2419 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2420 SDValue OldChain = SDValue(Op.getNode(), 1);
2421 SDValue NewChain = SDValue(Intr.getNode(), 1);
2422 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2423 return Intr.getNode();
2424}
2425
2426// Emit an intrinsic with an explicit CC register result.
2428 unsigned Opcode) {
2429 // Copy all operands except the intrinsic ID.
2430 unsigned NumOps = Op.getNumOperands();
2432 Ops.reserve(NumOps - 1);
2433 for (unsigned I = 1; I < NumOps; ++I)
2434 Ops.push_back(Op.getOperand(I));
2435
2436 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2437 return Intr.getNode();
2438}
2439
2440// CC is a comparison that will be implemented using an integer or
2441// floating-point comparison. Return the condition code mask for
2442// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2443// unsigned comparisons and clear for signed ones. In the floating-point
2444// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2446#define CONV(X) \
2447 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2448 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2449 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2450
2451 switch (CC) {
2452 default:
2453 llvm_unreachable("Invalid integer condition!");
2454
2455 CONV(EQ);
2456 CONV(NE);
2457 CONV(GT);
2458 CONV(GE);
2459 CONV(LT);
2460 CONV(LE);
2461
2462 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2464 }
2465#undef CONV
2466}
2467
2468// If C can be converted to a comparison against zero, adjust the operands
2469// as necessary.
2470static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2471 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2472 return;
2473
2474 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2475 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2476 return;
2477
2478 int64_t Value = ConstOp1->getSExtValue();
2479 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2480 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2481 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2482 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2483 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2484 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2485 }
2486}
2487
2488// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2489// adjust the operands as necessary.
2490static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2491 Comparison &C) {
2492 // For us to make any changes, it must a comparison between a single-use
2493 // load and a constant.
2494 if (!C.Op0.hasOneUse() ||
2495 C.Op0.getOpcode() != ISD::LOAD ||
2496 C.Op1.getOpcode() != ISD::Constant)
2497 return;
2498
2499 // We must have an 8- or 16-bit load.
2500 auto *Load = cast<LoadSDNode>(C.Op0);
2501 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2502 if ((NumBits != 8 && NumBits != 16) ||
2503 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2504 return;
2505
2506 // The load must be an extending one and the constant must be within the
2507 // range of the unextended value.
2508 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2509 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2510 return;
2511 uint64_t Value = ConstOp1->getZExtValue();
2512 uint64_t Mask = (1 << NumBits) - 1;
2513 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2514 // Make sure that ConstOp1 is in range of C.Op0.
2515 int64_t SignedValue = ConstOp1->getSExtValue();
2516 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2517 return;
2518 if (C.ICmpType != SystemZICMP::SignedOnly) {
2519 // Unsigned comparison between two sign-extended values is equivalent
2520 // to unsigned comparison between two zero-extended values.
2521 Value &= Mask;
2522 } else if (NumBits == 8) {
2523 // Try to treat the comparison as unsigned, so that we can use CLI.
2524 // Adjust CCMask and Value as necessary.
2525 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2526 // Test whether the high bit of the byte is set.
2527 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2528 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2529 // Test whether the high bit of the byte is clear.
2530 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2531 else
2532 // No instruction exists for this combination.
2533 return;
2534 C.ICmpType = SystemZICMP::UnsignedOnly;
2535 }
2536 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2537 if (Value > Mask)
2538 return;
2539 // If the constant is in range, we can use any comparison.
2540 C.ICmpType = SystemZICMP::Any;
2541 } else
2542 return;
2543
2544 // Make sure that the first operand is an i32 of the right extension type.
2545 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2548 if (C.Op0.getValueType() != MVT::i32 ||
2549 Load->getExtensionType() != ExtType) {
2550 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2551 Load->getBasePtr(), Load->getPointerInfo(),
2552 Load->getMemoryVT(), Load->getAlign(),
2553 Load->getMemOperand()->getFlags());
2554 // Update the chain uses.
2555 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2556 }
2557
2558 // Make sure that the second operand is an i32 with the right value.
2559 if (C.Op1.getValueType() != MVT::i32 ||
2560 Value != ConstOp1->getZExtValue())
2561 C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2562}
2563
2564// Return true if Op is either an unextended load, or a load suitable
2565// for integer register-memory comparisons of type ICmpType.
2566static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2567 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2568 if (Load) {
2569 // There are no instructions to compare a register with a memory byte.
2570 if (Load->getMemoryVT() == MVT::i8)
2571 return false;
2572 // Otherwise decide on extension type.
2573 switch (Load->getExtensionType()) {
2574 case ISD::NON_EXTLOAD:
2575 return true;
2576 case ISD::SEXTLOAD:
2577 return ICmpType != SystemZICMP::UnsignedOnly;
2578 case ISD::ZEXTLOAD:
2579 return ICmpType != SystemZICMP::SignedOnly;
2580 default:
2581 break;
2582 }
2583 }
2584 return false;
2585}
2586
2587// Return true if it is better to swap the operands of C.
2588static bool shouldSwapCmpOperands(const Comparison &C) {
2589 // Leave i128 and f128 comparisons alone, since they have no memory forms.
2590 if (C.Op0.getValueType() == MVT::i128)
2591 return false;
2592 if (C.Op0.getValueType() == MVT::f128)
2593 return false;
2594
2595 // Always keep a floating-point constant second, since comparisons with
2596 // zero can use LOAD TEST and comparisons with other constants make a
2597 // natural memory operand.
2598 if (isa<ConstantFPSDNode>(C.Op1))
2599 return false;
2600
2601 // Never swap comparisons with zero since there are many ways to optimize
2602 // those later.
2603 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2604 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2605 return false;
2606
2607 // Also keep natural memory operands second if the loaded value is
2608 // only used here. Several comparisons have memory forms.
2609 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2610 return false;
2611
2612 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2613 // In that case we generally prefer the memory to be second.
2614 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2615 // The only exceptions are when the second operand is a constant and
2616 // we can use things like CHHSI.
2617 if (!ConstOp1)
2618 return true;
2619 // The unsigned memory-immediate instructions can handle 16-bit
2620 // unsigned integers.
2621 if (C.ICmpType != SystemZICMP::SignedOnly &&
2622 isUInt<16>(ConstOp1->getZExtValue()))
2623 return false;
2624 // The signed memory-immediate instructions can handle 16-bit
2625 // signed integers.
2626 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2627 isInt<16>(ConstOp1->getSExtValue()))
2628 return false;
2629 return true;
2630 }
2631
2632 // Try to promote the use of CGFR and CLGFR.
2633 unsigned Opcode0 = C.Op0.getOpcode();
2634 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2635 return true;
2636 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2637 return true;
2638 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
2639 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2640 C.Op0.getConstantOperandVal(1) == 0xffffffff)
2641 return true;
2642
2643 return false;
2644}
2645
2646// Check whether C tests for equality between X and Y and whether X - Y
2647// or Y - X is also computed. In that case it's better to compare the
2648// result of the subtraction against zero.
2650 Comparison &C) {
2651 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2652 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2653 for (SDNode *N : C.Op0->uses()) {
2654 if (N->getOpcode() == ISD::SUB &&
2655 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2656 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2657 // Disable the nsw and nuw flags: the backend needs to handle
2658 // overflow as well during comparison elimination.
2659 SDNodeFlags Flags = N->getFlags();
2660 Flags.setNoSignedWrap(false);
2661 Flags.setNoUnsignedWrap(false);
2662 N->setFlags(Flags);
2663 C.Op0 = SDValue(N, 0);
2664 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2665 return;
2666 }
2667 }
2668 }
2669}
2670
2671// Check whether C compares a floating-point value with zero and if that
2672// floating-point value is also negated. In this case we can use the
2673// negation to set CC, so avoiding separate LOAD AND TEST and
2674// LOAD (NEGATIVE/COMPLEMENT) instructions.
2675static void adjustForFNeg(Comparison &C) {
2676 // This optimization is invalid for strict comparisons, since FNEG
2677 // does not raise any exceptions.
2678 if (C.Chain)
2679 return;
2680 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2681 if (C1 && C1->isZero()) {
2682 for (SDNode *N : C.Op0->uses()) {
2683 if (N->getOpcode() == ISD::FNEG) {
2684 C.Op0 = SDValue(N, 0);
2685 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2686 return;
2687 }
2688 }
2689 }
2690}
2691
2692// Check whether C compares (shl X, 32) with 0 and whether X is
2693// also sign-extended. In that case it is better to test the result
2694// of the sign extension using LTGFR.
2695//
2696// This case is important because InstCombine transforms a comparison
2697// with (sext (trunc X)) into a comparison with (shl X, 32).
2698static void adjustForLTGFR(Comparison &C) {
2699 // Check for a comparison between (shl X, 32) and 0.
2700 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
2701 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
2702 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2703 if (C1 && C1->getZExtValue() == 32) {
2704 SDValue ShlOp0 = C.Op0.getOperand(0);
2705 // See whether X has any SIGN_EXTEND_INREG uses.
2706 for (SDNode *N : ShlOp0->uses()) {
2707 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2708 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2709 C.Op0 = SDValue(N, 0);
2710 return;
2711 }
2712 }
2713 }
2714 }
2715}
2716
2717// If C compares the truncation of an extending load, try to compare
2718// the untruncated value instead. This exposes more opportunities to
2719// reuse CC.
2720static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2721 Comparison &C) {
2722 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2723 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2724 C.Op1.getOpcode() == ISD::Constant &&
2725 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
2726 C.Op1->getAsZExtVal() == 0) {
2727 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2728 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
2729 C.Op0.getValueSizeInBits().getFixedValue()) {
2730 unsigned Type = L->getExtensionType();
2731 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2732 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2733 C.Op0 = C.Op0.getOperand(0);
2734 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2735 }
2736 }
2737 }
2738}
2739
2740// Return true if shift operation N has an in-range constant shift value.
2741// Store it in ShiftVal if so.
2742static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2743 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2744 if (!Shift)
2745 return false;
2746
2747 uint64_t Amount = Shift->getZExtValue();
2748 if (Amount >= N.getValueSizeInBits())
2749 return false;
2750
2751 ShiftVal = Amount;
2752 return true;
2753}
2754
2755// Check whether an AND with Mask is suitable for a TEST UNDER MASK
2756// instruction and whether the CC value is descriptive enough to handle
2757// a comparison of type Opcode between the AND result and CmpVal.
2758// CCMask says which comparison result is being tested and BitSize is
2759// the number of bits in the operands. If TEST UNDER MASK can be used,
2760// return the corresponding CC mask, otherwise return 0.
2761static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2762 uint64_t Mask, uint64_t CmpVal,
2763 unsigned ICmpType) {
2764 assert(Mask != 0 && "ANDs with zero should have been removed by now");
2765
2766 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2767 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2768 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2769 return 0;
2770
2771 // Work out the masks for the lowest and highest bits.
2773 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
2774
2775 // Signed ordered comparisons are effectively unsigned if the sign
2776 // bit is dropped.
2777 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2778
2779 // Check for equality comparisons with 0, or the equivalent.
2780 if (CmpVal == 0) {
2781 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2783 if (CCMask == SystemZ::CCMASK_CMP_NE)
2785 }
2786 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2787 if (CCMask == SystemZ::CCMASK_CMP_LT)
2789 if (CCMask == SystemZ::CCMASK_CMP_GE)
2791 }
2792 if (EffectivelyUnsigned && CmpVal < Low) {
2793 if (CCMask == SystemZ::CCMASK_CMP_LE)
2795 if (CCMask == SystemZ::CCMASK_CMP_GT)
2797 }
2798
2799 // Check for equality comparisons with the mask, or the equivalent.
2800 if (CmpVal == Mask) {
2801 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2803 if (CCMask == SystemZ::CCMASK_CMP_NE)
2805 }
2806 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2807 if (CCMask == SystemZ::CCMASK_CMP_GT)
2809 if (CCMask == SystemZ::CCMASK_CMP_LE)
2811 }
2812 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2813 if (CCMask == SystemZ::CCMASK_CMP_GE)
2815 if (CCMask == SystemZ::CCMASK_CMP_LT)
2817 }
2818
2819 // Check for ordered comparisons with the top bit.
2820 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2821 if (CCMask == SystemZ::CCMASK_CMP_LE)
2823 if (CCMask == SystemZ::CCMASK_CMP_GT)
2825 }
2826 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2827 if (CCMask == SystemZ::CCMASK_CMP_LT)
2829 if (CCMask == SystemZ::CCMASK_CMP_GE)
2831 }
2832
2833 // If there are just two bits, we can do equality checks for Low and High
2834 // as well.
2835 if (Mask == Low + High) {
2836 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2838 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2840 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2842 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2844 }
2845
2846 // Looks like we've exhausted our options.
2847 return 0;
2848}
2849
2850// See whether C can be implemented as a TEST UNDER MASK instruction.
2851// Update the arguments with the TM version if so.
2853 Comparison &C) {
2854 // Use VECTOR TEST UNDER MASK for i128 operations.
2855 if (C.Op0.getValueType() == MVT::i128) {
2856 // We can use VTM for EQ/NE comparisons of x & y against 0.
2857 if (C.Op0.getOpcode() == ISD::AND &&
2858 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2859 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
2860 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
2861 if (Mask && Mask->getAPIntValue() == 0) {
2862 C.Opcode = SystemZISD::VTM;
2863 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
2864 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
2865 C.CCValid = SystemZ::CCMASK_VCMP;
2866 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
2867 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
2868 else
2869 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
2870 }
2871 }
2872 return;
2873 }
2874
2875 // Check that we have a comparison with a constant.
2876 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2877 if (!ConstOp1)
2878 return;
2879 uint64_t CmpVal = ConstOp1->getZExtValue();
2880
2881 // Check whether the nonconstant input is an AND with a constant mask.
2882 Comparison NewC(C);
2883 uint64_t MaskVal;
2884 ConstantSDNode *Mask = nullptr;
2885 if (C.Op0.getOpcode() == ISD::AND) {
2886 NewC.Op0 = C.Op0.getOperand(0);
2887 NewC.Op1 = C.Op0.getOperand(1);
2888 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2889 if (!Mask)
2890 return;
2891 MaskVal = Mask->getZExtValue();
2892 } else {
2893 // There is no instruction to compare with a 64-bit immediate
2894 // so use TMHH instead if possible. We need an unsigned ordered
2895 // comparison with an i64 immediate.
2896 if (NewC.Op0.getValueType() != MVT::i64 ||
2897 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2898 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2899 NewC.ICmpType == SystemZICMP::SignedOnly)
2900 return;
2901 // Convert LE and GT comparisons into LT and GE.
2902 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2903 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2904 if (CmpVal == uint64_t(-1))
2905 return;
2906 CmpVal += 1;
2907 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2908 }
2909 // If the low N bits of Op1 are zero than the low N bits of Op0 can
2910 // be masked off without changing the result.
2911 MaskVal = -(CmpVal & -CmpVal);
2912 NewC.ICmpType = SystemZICMP::UnsignedOnly;
2913 }
2914 if (!MaskVal)
2915 return;
2916
2917 // Check whether the combination of mask, comparison value and comparison
2918 // type are suitable.
2919 unsigned BitSize = NewC.Op0.getValueSizeInBits();
2920 unsigned NewCCMask, ShiftVal;
2921 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2922 NewC.Op0.getOpcode() == ISD::SHL &&
2923 isSimpleShift(NewC.Op0, ShiftVal) &&
2924 (MaskVal >> ShiftVal != 0) &&
2925 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2926 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2927 MaskVal >> ShiftVal,
2928 CmpVal >> ShiftVal,
2929 SystemZICMP::Any))) {
2930 NewC.Op0 = NewC.Op0.getOperand(0);
2931 MaskVal >>= ShiftVal;
2932 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2933 NewC.Op0.getOpcode() == ISD::SRL &&
2934 isSimpleShift(NewC.Op0, ShiftVal) &&
2935 (MaskVal << ShiftVal != 0) &&
2936 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2937 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2938 MaskVal << ShiftVal,
2939 CmpVal << ShiftVal,
2941 NewC.Op0 = NewC.Op0.getOperand(0);
2942 MaskVal <<= ShiftVal;
2943 } else {
2944 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2945 NewC.ICmpType);
2946 if (!NewCCMask)
2947 return;
2948 }
2949
2950 // Go ahead and make the change.
2951 C.Opcode = SystemZISD::TM;
2952 C.Op0 = NewC.Op0;
2953 if (Mask && Mask->getZExtValue() == MaskVal)
2954 C.Op1 = SDValue(Mask, 0);
2955 else
2956 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2957 C.CCValid = SystemZ::CCMASK_TM;
2958 C.CCMask = NewCCMask;
2959}
2960
2961// Implement i128 comparison in vector registers.
2962static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
2963 Comparison &C) {
2964 if (C.Opcode != SystemZISD::ICMP)
2965 return;
2966 if (C.Op0.getValueType() != MVT::i128)
2967 return;
2968
2969 // (In-)Equality comparisons can be implemented via VCEQGS.
2970 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2971 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2972 C.Opcode = SystemZISD::VICMPES;
2973 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
2974 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
2975 C.CCValid = SystemZ::CCMASK_VCMP;
2976 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
2977 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
2978 else
2979 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
2980 return;
2981 }
2982
2983 // Normalize other comparisons to GT.
2984 bool Swap = false, Invert = false;
2985 switch (C.CCMask) {
2986 case SystemZ::CCMASK_CMP_GT: break;
2987 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
2988 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
2989 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
2990 default: llvm_unreachable("Invalid integer condition!");
2991 }
2992 if (Swap)
2993 std::swap(C.Op0, C.Op1);
2994
2995 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2996 C.Opcode = SystemZISD::UCMP128HI;
2997 else
2998 C.Opcode = SystemZISD::SCMP128HI;
2999 C.CCValid = SystemZ::CCMASK_ANY;
3000 C.CCMask = SystemZ::CCMASK_1;
3001
3002 if (Invert)
3003 C.CCMask ^= C.CCValid;
3004}
3005
3006// See whether the comparison argument contains a redundant AND
3007// and remove it if so. This sometimes happens due to the generic
3008// BRCOND expansion.
3010 Comparison &C) {
3011 if (C.Op0.getOpcode() != ISD::AND)
3012 return;
3013 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3014 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3015 return;
3016 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3017 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3018 return;
3019
3020 C.Op0 = C.Op0.getOperand(0);
3021}
3022
3023// Return a Comparison that tests the condition-code result of intrinsic
3024// node Call against constant integer CC using comparison code Cond.
3025// Opcode is the opcode of the SystemZISD operation for the intrinsic
3026// and CCValid is the set of possible condition-code results.
3027static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3028 SDValue Call, unsigned CCValid, uint64_t CC,
3030 Comparison C(Call, SDValue(), SDValue());
3031 C.Opcode = Opcode;
3032 C.CCValid = CCValid;
3033 if (Cond == ISD::SETEQ)
3034 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3035 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3036 else if (Cond == ISD::SETNE)
3037 // ...and the inverse of that.
3038 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3039 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3040 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3041 // always true for CC>3.
3042 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3043 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3044 // ...and the inverse of that.
3045 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3046 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3047 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3048 // always true for CC>3.
3049 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3050 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3051 // ...and the inverse of that.
3052 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3053 else
3054 llvm_unreachable("Unexpected integer comparison type");
3055 C.CCMask &= CCValid;
3056 return C;
3057}
3058
3059// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3060static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3061 ISD::CondCode Cond, const SDLoc &DL,
3062 SDValue Chain = SDValue(),
3063 bool IsSignaling = false) {
3064 if (CmpOp1.getOpcode() == ISD::Constant) {
3065 assert(!Chain);
3066 unsigned Opcode, CCValid;
3067 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3068 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3069 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3070 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3071 CmpOp1->getAsZExtVal(), Cond);
3072 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3073 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3074 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3075 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3076 CmpOp1->getAsZExtVal(), Cond);
3077 }
3078 Comparison C(CmpOp0, CmpOp1, Chain);
3079 C.CCMask = CCMaskForCondCode(Cond);
3080 if (C.Op0.getValueType().isFloatingPoint()) {
3081 C.CCValid = SystemZ::CCMASK_FCMP;
3082 if (!C.Chain)
3083 C.Opcode = SystemZISD::FCMP;
3084 else if (!IsSignaling)
3085 C.Opcode = SystemZISD::STRICT_FCMP;
3086 else
3087 C.Opcode = SystemZISD::STRICT_FCMPS;
3089 } else {
3090 assert(!C.Chain);
3091 C.CCValid = SystemZ::CCMASK_ICMP;
3092 C.Opcode = SystemZISD::ICMP;
3093 // Choose the type of comparison. Equality and inequality tests can
3094 // use either signed or unsigned comparisons. The choice also doesn't
3095 // matter if both sign bits are known to be clear. In those cases we
3096 // want to give the main isel code the freedom to choose whichever
3097 // form fits best.
3098 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3099 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3100 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3101 C.ICmpType = SystemZICMP::Any;
3102 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3103 C.ICmpType = SystemZICMP::UnsignedOnly;
3104 else
3105 C.ICmpType = SystemZICMP::SignedOnly;
3106 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3107 adjustForRedundantAnd(DAG, DL, C);
3108 adjustZeroCmp(DAG, DL, C);
3109 adjustSubwordCmp(DAG, DL, C);
3110 adjustForSubtraction(DAG, DL, C);
3112 adjustICmpTruncate(DAG, DL, C);
3113 }
3114
3115 if (shouldSwapCmpOperands(C)) {
3116 std::swap(C.Op0, C.Op1);
3117 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3118 }
3119
3121 adjustICmp128(DAG, DL, C);
3122 return C;
3123}
3124
3125// Emit the comparison instruction described by C.
3126static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3127 if (!C.Op1.getNode()) {
3128 SDNode *Node;
3129 switch (C.Op0.getOpcode()) {
3131 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3132 return SDValue(Node, 0);
3134 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3135 return SDValue(Node, Node->getNumValues() - 1);
3136 default:
3137 llvm_unreachable("Invalid comparison operands");
3138 }
3139 }
3140 if (C.Opcode == SystemZISD::ICMP)
3141 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3142 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3143 if (C.Opcode == SystemZISD::TM) {
3144 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3146 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3147 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3148 }
3149 if (C.Opcode == SystemZISD::VICMPES) {
3150 SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32);
3151 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3152 return SDValue(Val.getNode(), 1);
3153 }
3154 if (C.Chain) {
3155 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3156 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3157 }
3158 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3159}
3160
3161// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3162// 64 bits. Extend is the extension type to use. Store the high part
3163// in Hi and the low part in Lo.
3164static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3165 SDValue Op0, SDValue Op1, SDValue &Hi,
3166 SDValue &Lo) {
3167 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3168 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3169 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3170 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3171 DAG.getConstant(32, DL, MVT::i64));
3172 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3173 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3174}
3175
3176// Lower a binary operation that produces two VT results, one in each
3177// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3178// and Opcode performs the GR128 operation. Store the even register result
3179// in Even and the odd register result in Odd.
3180static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3181 unsigned Opcode, SDValue Op0, SDValue Op1,
3182 SDValue &Even, SDValue &Odd) {
3183 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3184 bool Is32Bit = is32Bit(VT);
3185 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3186 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3187}
3188
3189// Return an i32 value that is 1 if the CC value produced by CCReg is
3190// in the mask CCMask and 0 otherwise. CC is known to have a value
3191// in CCValid, so other values can be ignored.
3192static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3193 unsigned CCValid, unsigned CCMask) {
3194 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3195 DAG.getConstant(0, DL, MVT::i32),
3196 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3197 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3198 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3199}
3200
3201// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3202// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3203// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3204// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3205// floating-point comparisons.
3208 switch (CC) {
3209 case ISD::SETOEQ:
3210 case ISD::SETEQ:
3211 switch (Mode) {
3212 case CmpMode::Int: return SystemZISD::VICMPE;
3213 case CmpMode::FP: return SystemZISD::VFCMPE;
3214 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3215 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3216 }
3217 llvm_unreachable("Bad mode");
3218
3219 case ISD::SETOGE:
3220 case ISD::SETGE:
3221 switch (Mode) {
3222 case CmpMode::Int: return 0;
3223 case CmpMode::FP: return SystemZISD::VFCMPHE;
3224 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3225 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3226 }
3227 llvm_unreachable("Bad mode");
3228
3229 case ISD::SETOGT:
3230 case ISD::SETGT:
3231 switch (Mode) {
3232 case CmpMode::Int: return SystemZISD::VICMPH;
3233 case CmpMode::FP: return SystemZISD::VFCMPH;
3234 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3235 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3236 }
3237 llvm_unreachable("Bad mode");
3238
3239 case ISD::SETUGT:
3240 switch (Mode) {
3241 case CmpMode::Int: return SystemZISD::VICMPHL;
3242 case CmpMode::FP: return 0;
3243 case CmpMode::StrictFP: return 0;
3244 case CmpMode::SignalingFP: return 0;
3245 }
3246 llvm_unreachable("Bad mode");
3247
3248 default:
3249 return 0;
3250 }
3251}
3252
3253// Return the SystemZISD vector comparison operation for CC or its inverse,
3254// or 0 if neither can be done directly. Indicate in Invert whether the
3255// result is for the inverse of CC. Mode is as above.
3257 bool &Invert) {
3258 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3259 Invert = false;
3260 return Opcode;
3261 }
3262
3263 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3264 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3265 Invert = true;
3266 return Opcode;
3267 }
3268
3269 return 0;
3270}
3271
3272// Return a v2f64 that contains the extended form of elements Start and Start+1
3273// of v4f32 value Op. If Chain is nonnull, return the strict form.
3274static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3275 SDValue Op, SDValue Chain) {
3276 int Mask[] = { Start, -1, Start + 1, -1 };
3277 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3278 if (Chain) {
3279 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3280 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3281 }
3282 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3283}
3284
3285// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3286// producing a result of type VT. If Chain is nonnull, return the strict form.
3287SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3288 const SDLoc &DL, EVT VT,
3289 SDValue CmpOp0,
3290 SDValue CmpOp1,
3291 SDValue Chain) const {
3292 // There is no hardware support for v4f32 (unless we have the vector
3293 // enhancements facility 1), so extend the vector into two v2f64s
3294 // and compare those.
3295 if (CmpOp0.getValueType() == MVT::v4f32 &&
3296 !Subtarget.hasVectorEnhancements1()) {
3297 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3298 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3299 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3300 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3301 if (Chain) {
3302 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3303 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3304 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3305 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3306 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3307 H1.getValue(1), L1.getValue(1),
3308 HRes.getValue(1), LRes.getValue(1) };
3309 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3310 SDValue Ops[2] = { Res, NewChain };
3311 return DAG.getMergeValues(Ops, DL);
3312 }
3313 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3314 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3315 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3316 }
3317 if (Chain) {
3318 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3319 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3320 }
3321 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3322}
3323
3324// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3325// an integer mask of type VT. If Chain is nonnull, we have a strict
3326// floating-point comparison. If in addition IsSignaling is true, we have
3327// a strict signaling floating-point comparison.
3328SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3329 const SDLoc &DL, EVT VT,
3331 SDValue CmpOp0,
3332 SDValue CmpOp1,
3333 SDValue Chain,
3334 bool IsSignaling) const {
3335 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3336 assert (!Chain || IsFP);
3337 assert (!IsSignaling || Chain);
3338 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3339 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3340 bool Invert = false;
3341 SDValue Cmp;
3342 switch (CC) {
3343 // Handle tests for order using (or (ogt y x) (oge x y)).
3344 case ISD::SETUO:
3345 Invert = true;
3346 [[fallthrough]];
3347 case ISD::SETO: {
3348 assert(IsFP && "Unexpected integer comparison");
3349 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3350 DL, VT, CmpOp1, CmpOp0, Chain);
3351 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3352 DL, VT, CmpOp0, CmpOp1, Chain);
3353 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3354 if (Chain)
3355 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3356 LT.getValue(1), GE.getValue(1));
3357 break;
3358 }
3359
3360 // Handle <> tests using (or (ogt y x) (ogt x y)).
3361 case ISD::SETUEQ:
3362 Invert = true;
3363 [[fallthrough]];
3364 case ISD::SETONE: {
3365 assert(IsFP && "Unexpected integer comparison");
3366 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3367 DL, VT, CmpOp1, CmpOp0, Chain);
3368 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3369 DL, VT, CmpOp0, CmpOp1, Chain);
3370 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3371 if (Chain)
3372 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3373 LT.getValue(1), GT.getValue(1));
3374 break;
3375 }
3376
3377 // Otherwise a single comparison is enough. It doesn't really
3378 // matter whether we try the inversion or the swap first, since
3379 // there are no cases where both work.
3380 default:
3381 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3382 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3383 else {
3385 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3386 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3387 else
3388 llvm_unreachable("Unhandled comparison");
3389 }
3390 if (Chain)
3391 Chain = Cmp.getValue(1);
3392 break;
3393 }
3394 if (Invert) {
3395 SDValue Mask =
3396 DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
3397 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3398 }
3399 if (Chain && Chain.getNode() != Cmp.getNode()) {
3400 SDValue Ops[2] = { Cmp, Chain };
3401 Cmp = DAG.getMergeValues(Ops, DL);
3402 }
3403 return Cmp;
3404}
3405
3406SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3407 SelectionDAG &DAG) const {
3408 SDValue CmpOp0 = Op.getOperand(0);
3409 SDValue CmpOp1 = Op.getOperand(1);
3410 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3411 SDLoc DL(Op);
3412 EVT VT = Op.getValueType();
3413 if (VT.isVector())
3414 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3415
3416 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3417 SDValue CCReg = emitCmp(DAG, DL, C);
3418 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3419}
3420
3421SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3422 SelectionDAG &DAG,
3423 bool IsSignaling) const {
3424 SDValue Chain = Op.getOperand(0);
3425 SDValue CmpOp0 = Op.getOperand(1);
3426 SDValue CmpOp1 = Op.getOperand(2);
3427 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3428 SDLoc DL(Op);
3429 EVT VT = Op.getNode()->getValueType(0);
3430 if (VT.isVector()) {
3431 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3432 Chain, IsSignaling);
3433 return Res.getValue(Op.getResNo());
3434 }
3435
3436 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3437 SDValue CCReg = emitCmp(DAG, DL, C);
3438 CCReg->setFlags(Op->getFlags());
3439 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3440 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3441 return DAG.getMergeValues(Ops, DL);
3442}
3443
3444SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3445 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3446 SDValue CmpOp0 = Op.getOperand(2);
3447 SDValue CmpOp1 = Op.getOperand(3);
3448 SDValue Dest = Op.getOperand(4);
3449 SDLoc DL(Op);
3450
3451 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3452 SDValue CCReg = emitCmp(DAG, DL, C);
3453 return DAG.getNode(
3454 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3455 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3456 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3457}
3458
3459// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3460// allowing Pos and Neg to be wider than CmpOp.
3461static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3462 return (Neg.getOpcode() == ISD::SUB &&
3463 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3464 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
3465 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3466 Pos.getOperand(0) == CmpOp)));
3467}
3468
3469// Return the absolute or negative absolute of Op; IsNegative decides which.
3471 bool IsNegative) {
3472 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3473 if (IsNegative)
3474 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3475 DAG.getConstant(0, DL, Op.getValueType()), Op);
3476 return Op;
3477}
3478
3479SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3480 SelectionDAG &DAG) const {
3481 SDValue CmpOp0 = Op.getOperand(0);
3482 SDValue CmpOp1 = Op.getOperand(1);
3483 SDValue TrueOp = Op.getOperand(2);
3484 SDValue FalseOp = Op.getOperand(3);
3485 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3486 SDLoc DL(Op);
3487
3488 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3489
3490 // Check for absolute and negative-absolute selections, including those
3491 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3492 // This check supplements the one in DAGCombiner.
3493 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3494 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3495 C.Op1.getOpcode() == ISD::Constant &&
3496 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3497 C.Op1->getAsZExtVal() == 0) {
3498 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3499 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3500 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3501 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3502 }
3503
3504 SDValue CCReg = emitCmp(DAG, DL, C);
3505 SDValue Ops[] = {TrueOp, FalseOp,
3506 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3507 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3508
3509 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3510}
3511
3512SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3513 SelectionDAG &DAG) const {
3514 SDLoc DL(Node);
3515 const GlobalValue *GV = Node->getGlobal();
3516 int64_t Offset = Node->getOffset();
3517 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3519
3521 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3522 if (isInt<32>(Offset)) {
3523 // Assign anchors at 1<<12 byte boundaries.
3524 uint64_t Anchor = Offset & ~uint64_t(0xfff);
3525 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3526 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3527
3528 // The offset can be folded into the address if it is aligned to a
3529 // halfword.
3530 Offset -= Anchor;
3531 if (Offset != 0 && (Offset & 1) == 0) {
3532 SDValue Full =
3533 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3534 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3535 Offset = 0;
3536 }
3537 } else {
3538 // Conservatively load a constant offset greater than 32 bits into a
3539 // register below.
3540 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3541 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3542 }
3543 } else if (Subtarget.isTargetELF()) {
3544 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3545 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3546 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3548 } else if (Subtarget.isTargetzOS()) {
3549 Result = getADAEntry(DAG, GV, DL, PtrVT);
3550 } else
3551 llvm_unreachable("Unexpected Subtarget");
3552
3553 // If there was a non-zero offset that we didn't fold, create an explicit
3554 // addition for it.
3555 if (Offset != 0)
3556 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3557 DAG.getConstant(Offset, DL, PtrVT));
3558
3559 return Result;
3560}
3561
3562SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3563 SelectionDAG &DAG,
3564 unsigned Opcode,
3565 SDValue GOTOffset) const {
3566 SDLoc DL(Node);
3567 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3568 SDValue Chain = DAG.getEntryNode();
3569 SDValue Glue;
3570
3573 report_fatal_error("In GHC calling convention TLS is not supported");
3574
3575 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3576 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3577 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3578 Glue = Chain.getValue(1);
3579 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3580 Glue = Chain.getValue(1);
3581
3582 // The first call operand is the chain and the second is the TLS symbol.
3584 Ops.push_back(Chain);
3585 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3586 Node->getValueType(0),
3587 0, 0));
3588
3589 // Add argument registers to the end of the list so that they are
3590 // known live into the call.
3591 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3592 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3593
3594 // Add a register mask operand representing the call-preserved registers.
3595 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3596 const uint32_t *Mask =
3597 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3598 assert(Mask && "Missing call preserved mask for calling convention");
3599 Ops.push_back(DAG.getRegisterMask(Mask));
3600
3601 // Glue the call to the argument copies.
3602 Ops.push_back(Glue);
3603
3604 // Emit the call.
3605 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3606 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3607 Glue = Chain.getValue(1);
3608
3609 // Copy the return value from %r2.
3610 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3611}
3612
3613SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3614 SelectionDAG &DAG) const {
3615 SDValue Chain = DAG.getEntryNode();
3616 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3617
3618 // The high part of the thread pointer is in access register 0.
3619 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3620 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3621
3622 // The low part of the thread pointer is in access register 1.
3623 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3624 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3625
3626 // Merge them into a single 64-bit address.
3627 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3628 DAG.getConstant(32, DL, PtrVT));
3629 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3630}
3631
3632SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3633 SelectionDAG &DAG) const {
3634 if (DAG.getTarget().useEmulatedTLS())
3635 return LowerToTLSEmulatedModel(Node, DAG);
3636 SDLoc DL(Node);
3637 const GlobalValue *GV = Node->getGlobal();
3638 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3639 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
3640
3643 report_fatal_error("In GHC calling convention TLS is not supported");
3644
3645 SDValue TP = lowerThreadPointer(DL, DAG);
3646
3647 // Get the offset of GA from the thread pointer, based on the TLS model.
3649 switch (model) {
3651 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3654
3655 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3656 Offset = DAG.getLoad(
3657 PtrVT, DL, DAG.getEntryNode(), Offset,
3659
3660 // Call __tls_get_offset to retrieve the offset.
3661 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3662 break;
3663 }
3664
3666 // Load the GOT offset of the module ID.
3669
3670 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3671 Offset = DAG.getLoad(
3672 PtrVT, DL, DAG.getEntryNode(), Offset,
3674
3675 // Call __tls_get_offset to retrieve the module base offset.
3676 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3677
3678 // Note: The SystemZLDCleanupPass will remove redundant computations
3679 // of the module base offset. Count total number of local-dynamic
3680 // accesses to trigger execution of that pass.
3684
3685 // Add the per-symbol offset.
3687
3688 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3689 DTPOffset = DAG.getLoad(
3690 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3692
3693 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3694 break;
3695 }
3696
3697 case TLSModel::InitialExec: {
3698 // Load the offset from the GOT.
3699 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3702 Offset =
3703 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3705 break;
3706 }
3707
3708 case TLSModel::LocalExec: {
3709 // Force the offset into the constant pool and load it from there.
3712
3713 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3714 Offset = DAG.getLoad(
3715 PtrVT, DL, DAG.getEntryNode(), Offset,
3717 break;
3718 }
3719 }
3720
3721 // Add the base and offset together.
3722 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3723}
3724
3725SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3726 SelectionDAG &DAG) const {
3727 SDLoc DL(Node);
3728 const BlockAddress *BA = Node->getBlockAddress();
3729 int64_t Offset = Node->getOffset();
3730 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3731
3732 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3733 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3734 return Result;
3735}
3736
3737SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3738 SelectionDAG &DAG) const {
3739 SDLoc DL(JT);
3740 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3741 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3742
3743 // Use LARL to load the address of the table.
3744 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3745}
3746
3747SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3748 SelectionDAG &DAG) const {
3749 SDLoc DL(CP);
3750 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3751
3753 if (CP->isMachineConstantPoolEntry())
3754 Result =
3755 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3756 else
3757 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3758 CP->getOffset());
3759
3760 // Use LARL to load the address of the constant pool entry.
3761 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3762}
3763
3764SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3765 SelectionDAG &DAG) const {
3766 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
3768 MachineFrameInfo &MFI = MF.getFrameInfo();
3769 MFI.setFrameAddressIsTaken(true);
3770
3771 SDLoc DL(Op);
3772 unsigned Depth = Op.getConstantOperandVal(0);
3773 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3774
3775 // By definition, the frame address is the address of the back chain. (In
3776 // the case of packed stack without backchain, return the address where the
3777 // backchain would have been stored. This will either be an unused space or
3778 // contain a saved register).
3779 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3780 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3781
3782 if (Depth > 0) {
3783 // FIXME The frontend should detect this case.
3784 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
3785 report_fatal_error("Unsupported stack frame traversal count");
3786
3787 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
3788 while (Depth--) {
3789 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
3791 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
3792 }
3793 }
3794
3795 return BackChain;
3796}
3797
3798SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3799 SelectionDAG &DAG) const {
3801 MachineFrameInfo &MFI = MF.getFrameInfo();
3802 MFI.setReturnAddressIsTaken(true);
3803
3805 return SDValue();
3806
3807 SDLoc DL(Op);
3808 unsigned Depth = Op.getConstantOperandVal(0);
3809 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3810
3811 if (Depth > 0) {
3812 // FIXME The frontend should detect this case.
3813 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
3814 report_fatal_error("Unsupported stack frame traversal count");
3815
3816 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
3817 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
3818 int Offset = TFL->getReturnAddressOffset(MF);
3819 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
3820 DAG.getConstant(Offset, DL, PtrVT));
3821 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
3823 }
3824
3825 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an
3826 // implicit live-in.
3829 &SystemZ::GR64BitRegClass);
3830 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3831}
3832
3833SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3834 SelectionDAG &DAG) const {
3835 SDLoc DL(Op);
3836 SDValue In = Op.getOperand(0);
3837 EVT InVT = In.getValueType();
3838 EVT ResVT = Op.getValueType();
3839
3840 // Convert loads directly. This is normally done by DAGCombiner,
3841 // but we need this case for bitcasts that are created during lowering
3842 // and which are then lowered themselves.
3843 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3844 if (ISD::isNormalLoad(LoadN)) {
3845 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3846 LoadN->getBasePtr(), LoadN->getMemOperand());
3847 // Update the chain uses.
3848 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3849 return NewLoad;
3850 }
3851
3852 if (InVT == MVT::i32 && ResVT == MVT::f32) {
3853 SDValue In64;
3854 if (Subtarget.hasHighWord()) {
3855 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3856 MVT::i64);
3857 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3858 MVT::i64, SDValue(U64, 0), In);
3859 } else {
3860 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3861 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3862 DAG.getConstant(32, DL, MVT::i64));
3863 }
3864 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3865 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3866 DL, MVT::f32, Out64);
3867 }
3868 if (InVT == MVT::f32 && ResVT == MVT::i32) {
3869 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3870 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3871 MVT::f64, SDValue(U64, 0), In);
3872 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3873 if (Subtarget.hasHighWord())
3874 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3875 MVT::i32, Out64);
3876 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3877 DAG.getConstant(32, DL, MVT::i64));
3878 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3879 }
3880 llvm_unreachable("Unexpected bitcast combination");
3881}
3882
3883SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3884 SelectionDAG &DAG) const {
3885
3886 if (Subtarget.isTargetXPLINK64())
3887 return lowerVASTART_XPLINK(Op, DAG);
3888 else
3889 return lowerVASTART_ELF(Op, DAG);
3890}
3891
3892SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
3893 SelectionDAG &DAG) const {
3895 SystemZMachineFunctionInfo *FuncInfo =
3897
3898 SDLoc DL(Op);
3899
3900 // vastart just stores the address of the VarArgsFrameIndex slot into the
3901 // memory location argument.
3902 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3903 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3904 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3905 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
3906 MachinePointerInfo(SV));
3907}
3908
3909SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
3910 SelectionDAG &DAG) const {
3912 SystemZMachineFunctionInfo *FuncInfo =
3914 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3915
3916 SDValue Chain = Op.getOperand(0);
3917 SDValue Addr = Op.getOperand(1);
3918 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3919 SDLoc DL(Op);
3920
3921 // The initial values of each field.
3922 const unsigned NumFields = 4;
3923 SDValue Fields[NumFields] = {
3924 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3925 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3926 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3927 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3928 };
3929
3930 // Store each field into its respective slot.
3931 SDValue MemOps[NumFields];
3932 unsigned Offset = 0;
3933 for (unsigned I = 0; I < NumFields; ++I) {
3934 SDValue FieldAddr = Addr;
3935 if (Offset != 0)
3936 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3938 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3940 Offset += 8;
3941 }
3942 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3943}
3944
3945SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3946 SelectionDAG &DAG) const {
3947 SDValue Chain = Op.getOperand(0);
3948 SDValue DstPtr = Op.getOperand(1);
3949 SDValue SrcPtr = Op.getOperand(2);
3950 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3951 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3952 SDLoc DL(Op);
3953
3954 uint32_t Sz =
3955 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
3956 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
3957 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3958 /*isTailCall*/ false, MachinePointerInfo(DstSV),
3959 MachinePointerInfo(SrcSV));
3960}
3961
3962SDValue
3963SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
3964 SelectionDAG &DAG) const {
3965 if (Subtarget.isTargetXPLINK64())
3966 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
3967 else
3968 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
3969}
3970
3971SDValue
3972SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
3973 SelectionDAG &DAG) const {
3974 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3976 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3977 SDValue Chain = Op.getOperand(0);
3978 SDValue Size = Op.getOperand(1);
3979 SDValue Align = Op.getOperand(2);
3980 SDLoc DL(Op);
3981
3982 // If user has set the no alignment function attribute, ignore
3983 // alloca alignments.
3984 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
3985
3986 uint64_t StackAlign = TFI->getStackAlignment();
3987 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3988 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3989
3990 SDValue NeededSpace = Size;
3991
3992 // Add extra space for alignment if needed.
3993 EVT PtrVT = getPointerTy(MF.getDataLayout());
3994 if (ExtraAlignSpace)
3995 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
3996 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3997
3998 bool IsSigned = false;
3999 bool DoesNotReturn = false;
4000 bool IsReturnValueUsed = false;
4001 EVT VT = Op.getValueType();
4002 SDValue AllocaCall =
4003 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
4004 CallingConv::C, IsSigned, DL, DoesNotReturn,
4005 IsReturnValueUsed)
4006 .first;
4007
4008 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4009 // to end of call in order to ensure it isn't broken up from the call
4010 // sequence.
4011 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4012 Register SPReg = Regs.getStackPointerRegister();
4013 Chain = AllocaCall.getValue(1);
4014 SDValue Glue = AllocaCall.getValue(2);
4015 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4016 Chain = NewSPRegNode.getValue(1);
4017
4018 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4019 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4020 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4021
4022 // Dynamically realign if needed.
4023 if (ExtraAlignSpace) {
4024 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4025 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4026 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4027 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4028 }
4029
4030 SDValue Ops[2] = {Result, Chain};
4031 return DAG.getMergeValues(Ops, DL);
4032}
4033
4034SDValue
4035SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4036 SelectionDAG &DAG) const {
4037 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4039 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4040 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4041
4042 SDValue Chain = Op.getOperand(0);
4043 SDValue Size = Op.getOperand(1);
4044 SDValue Align = Op.getOperand(2);
4045 SDLoc DL(Op);
4046
4047 // If user has set the no alignment function attribute, ignore
4048 // alloca alignments.
4049 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4050
4051 uint64_t StackAlign = TFI->getStackAlignment();
4052 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4053 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4054
4056 SDValue NeededSpace = Size;
4057
4058 // Get a reference to the stack pointer.
4059 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4060
4061 // If we need a backchain, save it now.
4062 SDValue Backchain;
4063 if (StoreBackchain)
4064 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4066
4067 // Add extra space for alignment if needed.
4068 if (ExtraAlignSpace)
4069 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4070 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4071
4072 // Get the new stack pointer value.
4073 SDValue NewSP;
4074 if (hasInlineStackProbe(MF)) {
4076 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4077 Chain = NewSP.getValue(1);
4078 }
4079 else {
4080 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4081 // Copy the new stack pointer back.
4082 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4083 }
4084
4085 // The allocated data lives above the 160 bytes allocated for the standard
4086 // frame, plus any outgoing stack arguments. We don't know how much that
4087 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4088 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4089 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4090
4091 // Dynamically realign if needed.
4092 if (RequiredAlign > StackAlign) {
4093 Result =
4094 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4095 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4096 Result =
4097 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4098 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4099 }
4100
4101 if (StoreBackchain)
4102 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4104
4105 SDValue Ops[2] = { Result, Chain };
4106 return DAG.getMergeValues(Ops, DL);
4107}
4108
4109SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4110 SDValue Op, SelectionDAG &DAG) const {
4111 SDLoc DL(Op);
4112
4113 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4114}
4115
4116SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4117 SelectionDAG &DAG) const {
4118 EVT VT = Op.getValueType();
4119 SDLoc DL(Op);
4120 SDValue Ops[2];
4121 if (is32Bit(VT))
4122 // Just do a normal 64-bit multiplication and extract the results.
4123 // We define this so that it can be used for constant division.
4124 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4125 Op.getOperand(1), Ops[1], Ops[0]);
4126 else if (Subtarget.hasMiscellaneousExtensions2())
4127 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4128 // the high result in the even register. ISD::SMUL_LOHI is defined to
4129 // return the low half first, so the results are in reverse order.
4131 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4132 else {
4133 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4134 //
4135 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4136 //
4137 // but using the fact that the upper halves are either all zeros
4138 // or all ones:
4139 //
4140 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4141 //
4142 // and grouping the right terms together since they are quicker than the
4143 // multiplication:
4144 //
4145 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4146 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4147 SDValue LL = Op.getOperand(0);
4148 SDValue RL = Op.getOperand(1);
4149 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4150 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4151 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4152 // the high result in the even register. ISD::SMUL_LOHI is defined to
4153 // return the low half first, so the results are in reverse order.
4155 LL, RL, Ops[1], Ops[0]);
4156 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4157 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4158 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4159 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4160 }
4161 return DAG.getMergeValues(Ops, DL);
4162}
4163
4164SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4165 SelectionDAG &DAG) const {
4166 EVT VT = Op.getValueType();
4167 SDLoc DL(Op);
4168 SDValue Ops[2];
4169 if (is32Bit(VT))
4170 // Just do a normal 64-bit multiplication and extract the results.
4171 // We define this so that it can be used for constant division.
4172 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4173 Op.getOperand(1), Ops[1], Ops[0]);
4174 else
4175 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4176 // the high result in the even register. ISD::UMUL_LOHI is defined to
4177 // return the low half first, so the results are in reverse order.
4179 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4180 return DAG.getMergeValues(Ops, DL);
4181}
4182
4183SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4184 SelectionDAG &DAG) const {
4185 SDValue Op0 = Op.getOperand(0);
4186 SDValue Op1 = Op.getOperand(1);
4187 EVT VT = Op.getValueType();
4188 SDLoc DL(Op);
4189
4190 // We use DSGF for 32-bit division. This means the first operand must
4191 // always be 64-bit, and the second operand should be 32-bit whenever
4192 // that is possible, to improve performance.
4193 if (is32Bit(VT))
4194 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4195 else if (DAG.ComputeNumSignBits(Op1) > 32)
4196 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4197
4198 // DSG(F) returns the remainder in the even register and the
4199 // quotient in the odd register.
4200 SDValue Ops[2];
4201 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4202 return DAG.getMergeValues(Ops, DL);
4203}
4204
4205SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4206 SelectionDAG &DAG) const {
4207 EVT VT = Op.getValueType();
4208 SDLoc DL(Op);
4209
4210 // DL(G) returns the remainder in the even register and the
4211 // quotient in the odd register.
4212 SDValue Ops[2];
4214 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4215 return DAG.getMergeValues(Ops, DL);
4216}
4217
4218SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4219 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4220
4221 // Get the known-zero masks for each operand.
4222 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4223 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4224 DAG.computeKnownBits(Ops[1])};
4225
4226 // See if the upper 32 bits of one operand and the lower 32 bits of the
4227 // other are known zero. They are the low and high operands respectively.
4228 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4229 Known[1].Zero.getZExtValue() };
4230 unsigned High, Low;
4231 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4232 High = 1, Low = 0;
4233 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4234 High = 0, Low = 1;
4235 else
4236 return Op;
4237
4238 SDValue LowOp = Ops[Low];
4239 SDValue HighOp = Ops[High];
4240
4241 // If the high part is a constant, we're better off using IILH.
4242 if (HighOp.getOpcode() == ISD::Constant)
4243 return Op;
4244
4245 // If the low part is a constant that is outside the range of LHI,
4246 // then we're better off using IILF.
4247 if (LowOp.getOpcode() == ISD::Constant) {
4248 int64_t Value = int32_t(LowOp->getAsZExtVal());
4249 if (!isInt<16>(Value))
4250 return Op;
4251 }
4252
4253 // Check whether the high part is an AND that doesn't change the
4254 // high 32 bits and just masks out low bits. We can skip it if so.
4255 if (HighOp.getOpcode() == ISD::AND &&
4256 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4257 SDValue HighOp0 = HighOp.getOperand(0);
4259 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4260 HighOp = HighOp0;
4261 }
4262
4263 // Take advantage of the fact that all GR32 operations only change the
4264 // low 32 bits by truncating Low to an i32 and inserting it directly
4265 // using a subreg. The interesting cases are those where the truncation
4266 // can be folded.
4267 SDLoc DL(Op);
4268 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4269 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4270 MVT::i64, HighOp, Low32);
4271}
4272
4273// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4274SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4275 SelectionDAG &DAG) const {
4276 SDNode *N = Op.getNode();
4277 SDValue LHS = N->getOperand(0);
4278 SDValue RHS = N->getOperand(1);
4279 SDLoc DL(N);
4280
4281 if (N->getValueType(0) == MVT::i128) {
4282 unsigned BaseOp = 0;
4283 unsigned FlagOp = 0;
4284 bool IsBorrow = false;
4285 switch (Op.getOpcode()) {
4286 default: llvm_unreachable("Unknown instruction!");
4287 case ISD::UADDO:
4288 BaseOp = ISD::ADD;
4289 FlagOp = SystemZISD::VACC;
4290 break;
4291 case ISD::USUBO:
4292 BaseOp = ISD::SUB;
4293 FlagOp = SystemZISD::VSCBI;
4294 IsBorrow = true;
4295 break;
4296 }
4297 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4298 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4299 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4300 DAG.getValueType(MVT::i1));
4301 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4302 if (IsBorrow)
4303 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4304 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4305 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4306 }
4307
4308 unsigned BaseOp = 0;
4309 unsigned CCValid = 0;
4310 unsigned CCMask = 0;
4311
4312 switch (Op.getOpcode()) {
4313 default: llvm_unreachable("Unknown instruction!");
4314 case ISD::SADDO:
4315 BaseOp = SystemZISD::SADDO;
4316 CCValid = SystemZ::CCMASK_ARITH;
4318 break;
4319 case ISD::SSUBO:
4320 BaseOp = SystemZISD::SSUBO;
4321 CCValid = SystemZ::CCMASK_ARITH;
4323 break;
4324 case ISD::UADDO:
4325 BaseOp = SystemZISD::UADDO;
4326 CCValid = SystemZ::CCMASK_LOGICAL;
4328 break;
4329 case ISD::USUBO:
4330 BaseOp = SystemZISD::USUBO;
4331 CCValid = SystemZ::CCMASK_LOGICAL;
4333 break;
4334 }
4335
4336 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4337 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4338
4339 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4340 if (N->getValueType(1) == MVT::i1)
4341 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4342
4343 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4344}
4345
4346static bool isAddCarryChain(SDValue Carry) {
4347 while (Carry.getOpcode() == ISD::UADDO_CARRY)
4348 Carry = Carry.getOperand(2);
4349 return Carry.getOpcode() == ISD::UADDO;
4350}
4351
4352static bool isSubBorrowChain(SDValue Carry) {
4353 while (Carry.getOpcode() == ISD::USUBO_CARRY)
4354 Carry = Carry.getOperand(2);
4355 return Carry.getOpcode() == ISD::USUBO;
4356}
4357
4358// Lower UADDO_CARRY/USUBO_CARRY nodes.
4359SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4360 SelectionDAG &DAG) const {
4361
4362 SDNode *N = Op.getNode();
4363 MVT VT = N->getSimpleValueType(0);
4364
4365 // Let legalize expand this if it isn't a legal type yet.
4366 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4367 return SDValue();
4368
4369 SDValue LHS = N->getOperand(0);
4370 SDValue RHS = N->getOperand(1);
4371 SDValue Carry = Op.getOperand(2);
4372 SDLoc DL(N);
4373
4374 if (VT == MVT::i128) {
4375 unsigned BaseOp = 0;
4376 unsigned FlagOp = 0;
4377 bool IsBorrow = false;
4378 switch (Op.getOpcode()) {
4379 default: llvm_unreachable("Unknown instruction!");
4380 case ISD::UADDO_CARRY:
4381 BaseOp = SystemZISD::VAC;
4382 FlagOp = SystemZISD::VACCC;
4383 break;
4384 case ISD::USUBO_CARRY:
4385 BaseOp = SystemZISD::VSBI;
4386 FlagOp = SystemZISD::VSBCBI;
4387 IsBorrow = true;
4388 break;
4389 }
4390 if (IsBorrow)
4391 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4392 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
4393 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
4394 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
4395 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
4396 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4397 DAG.getValueType(MVT::i1));
4398 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4399 if (IsBorrow)
4400 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4401 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4402 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4403 }
4404
4405 unsigned BaseOp = 0;
4406 unsigned CCValid = 0;
4407 unsigned CCMask = 0;
4408
4409 switch (Op.getOpcode()) {
4410 default: llvm_unreachable("Unknown instruction!");
4411 case ISD::UADDO_CARRY:
4412 if (!isAddCarryChain(Carry))
4413 return SDValue();
4414
4415 BaseOp = SystemZISD::ADDCARRY;
4416 CCValid = SystemZ::CCMASK_LOGICAL;
4418 break;
4419 case ISD::USUBO_CARRY:
4420 if (!isSubBorrowChain(Carry))
4421 return SDValue();
4422
4423 BaseOp = SystemZISD::SUBCARRY;
4424 CCValid = SystemZ::CCMASK_LOGICAL;
4426 break;
4427 }
4428
4429 // Set the condition code from the carry flag.
4430 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4431 DAG.getConstant(CCValid, DL, MVT::i32),
4432 DAG.getConstant(CCMask, DL, MVT::i32));
4433
4434 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4435 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4436
4437 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4438 if (N->getValueType(1) == MVT::i1)
4439 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4440
4441 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4442}
4443
4444SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4445 SelectionDAG &DAG) const {
4446 EVT VT = Op.getValueType();
4447 SDLoc DL(Op);
4448 Op = Op.getOperand(0);
4449
4450 if (VT.getScalarSizeInBits() == 128) {
4451 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
4452 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
4453 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
4454 DAG.getConstant(0, DL, MVT::i64));
4455 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4456 return Op;
4457 }
4458
4459 // Handle vector types via VPOPCT.
4460 if (VT.isVector()) {
4461 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4462 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4463 switch (VT.getScalarSizeInBits()) {
4464 case 8:
4465 break;
4466 case 16: {
4467 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4468 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
4469 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
4470 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4471 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
4472 break;
4473 }
4474 case 32: {
4475 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4476 DAG.getConstant(0, DL, MVT::i32));
4477 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4478 break;
4479 }
4480 case 64: {
4481 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4482 DAG.getConstant(0, DL, MVT::i32));
4483 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
4484 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4485 break;
4486 }
4487 default:
4488 llvm_unreachable("Unexpected type");
4489 }
4490 return Op;
4491 }
4492
4493 // Get the known-zero mask for the operand.
4494 KnownBits Known = DAG.computeKnownBits(Op);
4495 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
4496 if (NumSignificantBits == 0)
4497 return DAG.getConstant(0, DL, VT);
4498
4499 // Skip known-zero high parts of the operand.
4500 int64_t OrigBitSize = VT.getSizeInBits();
4501 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
4502 BitSize = std::min(BitSize, OrigBitSize);
4503
4504 // The POPCNT instruction counts the number of bits in each byte.
4505 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
4506 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
4507 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
4508
4509 // Add up per-byte counts in a binary tree. All bits of Op at
4510 // position larger than BitSize remain zero throughout.
4511 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
4512 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
4513 if (BitSize != OrigBitSize)
4514 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
4515 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
4516 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4517 }
4518
4519 // Extract overall result from high byte.
4520 if (BitSize > 8)
4521 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
4522 DAG.getConstant(BitSize - 8, DL, VT));
4523
4524 return Op;
4525}
4526
4527SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
4528 SelectionDAG &DAG) const {
4529 SDLoc DL(Op);
4530 AtomicOrdering FenceOrdering =
4531 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4532 SyncScope::ID FenceSSID =
4533 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4534
4535 // The only fence that needs an instruction is a sequentially-consistent
4536 // cross-thread fence.
4537 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
4538 FenceSSID == SyncScope::System) {
4539 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
4540 Op.getOperand(0)),
4541 0);
4542 }
4543
4544 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4545 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
4546}
4547
4548SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
4549 SelectionDAG &DAG) const {
4550 auto *Node = cast<AtomicSDNode>(Op.getNode());
4551 assert(
4552 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
4553 "Only custom lowering i128 or f128.");
4554 // Use same code to handle both legal and non-legal i128 types.
4557 return DAG.getMergeValues(Results, SDLoc(Op));
4558}
4559
4560// Prepare for a Compare And Swap for a subword operation. This needs to be
4561// done in memory with 4 bytes at natural alignment.
4563 SDValue &AlignedAddr, SDValue &BitShift,
4564 SDValue &NegBitShift) {
4565 EVT PtrVT = Addr.getValueType();
4566 EVT WideVT = MVT::i32;
4567
4568 // Get the address of the containing word.
4569 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4570 DAG.getConstant(-4, DL, PtrVT));
4571
4572 // Get the number of bits that the word must be rotated left in order
4573 // to bring the field to the top bits of a GR32.
4574 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4575 DAG.getConstant(3, DL, PtrVT));
4576 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4577
4578 // Get the complementing shift amount, for rotating a field in the top
4579 // bits back to its proper position.
4580 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4581 DAG.getConstant(0, DL, WideVT), BitShift);
4582
4583}
4584
4585// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
4586// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
4587SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
4588 SelectionDAG &DAG,
4589 unsigned Opcode) const {
4590 auto *Node = cast<AtomicSDNode>(Op.getNode());
4591
4592 // 32-bit operations need no special handling.
4593 EVT NarrowVT = Node->getMemoryVT();
4594 EVT WideVT = MVT::i32;
4595 if (NarrowVT == WideVT)
4596 return Op;
4597
4598 int64_t BitSize = NarrowVT.getSizeInBits();
4599 SDValue ChainIn = Node->getChain();
4600 SDValue Addr = Node->getBasePtr();
4601 SDValue Src2 = Node->getVal();
4602 MachineMemOperand *MMO = Node->getMemOperand();
4603 SDLoc DL(Node);
4604
4605 // Convert atomic subtracts of constants into additions.
4606 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
4607 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
4609 Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
4610 }
4611
4612 SDValue AlignedAddr, BitShift, NegBitShift;
4613 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4614
4615 // Extend the source operand to 32 bits and prepare it for the inner loop.
4616 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
4617 // operations require the source to be shifted in advance. (This shift
4618 // can be folded if the source is constant.) For AND and NAND, the lower
4619 // bits must be set, while for other opcodes they should be left clear.
4620 if (Opcode != SystemZISD::ATOMIC_SWAPW)
4621 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
4622 DAG.getConstant(32 - BitSize, DL, WideVT));
4623 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
4625 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
4626 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
4627
4628 // Construct the ATOMIC_LOADW_* node.
4629 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
4630 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
4631 DAG.getConstant(BitSize, DL, WideVT) };
4632 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
4633 NarrowVT, MMO);
4634
4635 // Rotate the result of the final CS so that the field is in the lower
4636 // bits of a GR32, then truncate it.
4637 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
4638 DAG.getConstant(BitSize, DL, WideVT));
4639 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
4640
4641 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
4642 return DAG.getMergeValues(RetOps, DL);
4643}
4644
4645// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
4646// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
4647SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
4648 SelectionDAG &DAG) const {
4649 auto *Node = cast<AtomicSDNode>(Op.getNode());
4650 EVT MemVT = Node->getMemoryVT();
4651 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
4652 // A full-width operation: negate and use LAA(G).
4653 assert(Op.getValueType() == MemVT && "Mismatched VTs");
4654 assert(Subtarget.hasInterlockedAccess1() &&
4655 "Should have been expanded by AtomicExpand pass.");
4656 SDValue Src2 = Node->getVal();
4657 SDLoc DL(Src2);
4658 SDValue NegSrc2 =
4659 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
4660 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
4661 Node->getChain(), Node->getBasePtr(), NegSrc2,
4662 Node->getMemOperand());
4663 }
4664
4665 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
4666}
4667
4668// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
4669SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
4670 SelectionDAG &DAG) const {
4671 auto *Node = cast<AtomicSDNode>(Op.getNode());
4672 SDValue ChainIn = Node->getOperand(0);
4673 SDValue Addr = Node->getOperand(1);
4674 SDValue CmpVal = Node->getOperand(2);
4675 SDValue SwapVal = Node->getOperand(3);
4676 MachineMemOperand *MMO = Node->getMemOperand();
4677 SDLoc DL(Node);
4678
4679 if (Node->getMemoryVT() == MVT::i128) {
4680 // Use same code to handle both legal and non-legal i128 types.
4683 return DAG.getMergeValues(Results, DL);
4684 }
4685
4686 // We have native support for 32-bit and 64-bit compare and swap, but we
4687 // still need to expand extracting the "success" result from the CC.
4688 EVT NarrowVT = Node->getMemoryVT();
4689 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
4690 if (NarrowVT == WideVT) {
4691 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4692 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
4694 DL, Tys, Ops, NarrowVT, MMO);
4695 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4697
4698 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
4699 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4700 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4701 return SDValue();
4702 }
4703
4704 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
4705 // via a fullword ATOMIC_CMP_SWAPW operation.
4706 int64_t BitSize = NarrowVT.getSizeInBits();
4707
4708 SDValue AlignedAddr, BitShift, NegBitShift;
4709 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4710
4711 // Construct the ATOMIC_CMP_SWAPW node.
4712 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4713 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
4714 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
4716 VTList, Ops, NarrowVT, MMO);
4717 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4719
4720 // emitAtomicCmpSwapW() will zero extend the result (original value).
4721 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
4722 DAG.getValueType(NarrowVT));
4723 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
4724 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4725 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4726 return SDValue();
4727}
4728
4730SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
4731 // Because of how we convert atomic_load and atomic_store to normal loads and
4732 // stores in the DAG, we need to ensure that the MMOs are marked volatile
4733 // since DAGCombine hasn't been updated to account for atomic, but non
4734 // volatile loads. (See D57601)
4735 if (auto *SI = dyn_cast<StoreInst>(&I))
4736 if (SI->isAtomic())
4738 if (auto *LI = dyn_cast<LoadInst>(&I))
4739 if (LI->isAtomic())
4741 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
4742 if (AI->isAtomic())
4744 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
4745 if (AI->isAtomic())
4748}
4749
4750SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
4751 SelectionDAG &DAG) const {
4753 auto *Regs = Subtarget.getSpecialRegisters();
4755 report_fatal_error("Variable-sized stack allocations are not supported "
4756 "in GHC calling convention");
4757 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
4758 Regs->getStackPointerRegister(), Op.getValueType());
4759}
4760
4761SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
4762 SelectionDAG &DAG) const {
4764 auto *Regs = Subtarget.getSpecialRegisters();
4765 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4766
4768 report_fatal_error("Variable-sized stack allocations are not supported "
4769 "in GHC calling convention");
4770
4771 SDValue Chain = Op.getOperand(0);
4772 SDValue NewSP = Op.getOperand(1);
4773 SDValue Backchain;
4774 SDLoc DL(Op);
4775
4776 if (StoreBackchain) {
4777 SDValue OldSP = DAG.getCopyFromReg(
4778 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
4779 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4781 }
4782
4783 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
4784
4785 if (StoreBackchain)
4786 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4788
4789 return Chain;
4790}
4791
4792SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
4793 SelectionDAG &DAG) const {
4794 bool IsData = Op.getConstantOperandVal(4);
4795 if (!IsData)
4796 // Just preserve the chain.
4797 return Op.getOperand(0);
4798
4799 SDLoc DL(Op);
4800 bool IsWrite = Op.getConstantOperandVal(2);
4801 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
4802 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
4803 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
4804 Op.getOperand(1)};
4806 Node->getVTList(), Ops,
4807 Node->getMemoryVT(), Node->getMemOperand());
4808}
4809
4810// Convert condition code in CCReg to an i32 value.
4812 SDLoc DL(CCReg);
4813 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
4814 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
4815 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
4816}
4817
4818SDValue
4819SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4820 SelectionDAG &DAG) const {
4821 unsigned Opcode, CCValid;
4822 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
4823 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
4824 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
4825 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
4826 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
4827 return SDValue();
4828 }
4829
4830 return SDValue();
4831}
4832
4833SDValue
4834SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4835 SelectionDAG &DAG) const {
4836 unsigned Opcode, CCValid;
4837 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
4838 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
4839 if (Op->getNumValues() == 1)
4840 return getCCResult(DAG, SDValue(Node, 0));
4841 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
4842 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
4843 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
4844 }
4845
4846 unsigned Id = Op.getConstantOperandVal(0);
4847 switch (Id) {
4848 case Intrinsic::thread_pointer:
4849 return lowerThreadPointer(SDLoc(Op), DAG);
4850
4851 case Intrinsic::s390_vpdi:
4852 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
4853 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4854
4855 case Intrinsic::s390_vperm:
4856 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
4857 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4858
4859 case Intrinsic::s390_vuphb:
4860 case Intrinsic::s390_vuphh:
4861 case Intrinsic::s390_vuphf:
4862 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
4863 Op.getOperand(1));
4864
4865 case Intrinsic::s390_vuplhb:
4866 case Intrinsic::s390_vuplhh:
4867 case Intrinsic::s390_vuplhf:
4868 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
4869 Op.getOperand(1));
4870
4871 case Intrinsic::s390_vuplb:
4872 case Intrinsic::s390_vuplhw:
4873 case Intrinsic::s390_vuplf:
4874 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
4875 Op.getOperand(1));
4876
4877 case Intrinsic::s390_vupllb:
4878 case Intrinsic::s390_vupllh:
4879 case Intrinsic::s390_vupllf:
4880 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
4881 Op.getOperand(1));
4882
4883 case Intrinsic::s390_vsumb:
4884 case Intrinsic::s390_vsumh:
4885 case Intrinsic::s390_vsumgh:
4886 case Intrinsic::s390_vsumgf:
4887 case Intrinsic::s390_vsumqf:
4888 case Intrinsic::s390_vsumqg:
4889 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
4890 Op.getOperand(1), Op.getOperand(2));
4891
4892 case Intrinsic::s390_vaq:
4893 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
4894 Op.getOperand(1), Op.getOperand(2));
4895 case Intrinsic::s390_vaccb:
4896 case Intrinsic::s390_vacch:
4897 case Intrinsic::s390_vaccf:
4898 case Intrinsic::s390_vaccg:
4899 case Intrinsic::s390_vaccq:
4900 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
4901 Op.getOperand(1), Op.getOperand(2));
4902 case Intrinsic::s390_vacq:
4903 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
4904 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4905 case Intrinsic::s390_vacccq:
4906 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
4907 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4908
4909 case Intrinsic::s390_vsq:
4910 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
4911 Op.getOperand(1), Op.getOperand(2));
4912 case Intrinsic::s390_vscbib:
4913 case Intrinsic::s390_vscbih:
4914 case Intrinsic::s390_vscbif:
4915 case Intrinsic::s390_vscbig:
4916 case Intrinsic::s390_vscbiq:
4917 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
4918 Op.getOperand(1), Op.getOperand(2));
4919 case Intrinsic::s390_vsbiq:
4920 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
4921 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4922 case Intrinsic::s390_vsbcbiq:
4923 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
4924 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4925 }
4926
4927 return SDValue();
4928}
4929
4930namespace {
4931// Says that SystemZISD operation Opcode can be used to perform the equivalent
4932// of a VPERM with permute vector Bytes. If Opcode takes three operands,
4933// Operand is the constant third operand, otherwise it is the number of
4934// bytes in each element of the result.
4935struct Permute {
4936 unsigned Opcode;
4937 unsigned Operand;
4938 unsigned char Bytes[SystemZ::VectorBytes];
4939};
4940}
4941
4942static const Permute PermuteForms[] = {
4943 // VMRHG
4945 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
4946 // VMRHF
4948 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
4949 // VMRHH
4951 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
4952 // VMRHB
4954 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
4955 // VMRLG
4957 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
4958 // VMRLF
4960 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
4961 // VMRLH
4963 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
4964 // VMRLB
4966 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
4967 // VPKG
4968 { SystemZISD::PACK, 4,
4969 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
4970 // VPKF
4971 { SystemZISD::PACK, 2,
4972 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
4973 // VPKH
4974 { SystemZISD::PACK, 1,
4975 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
4976 // VPDI V1, V2, 4 (low half of V1, high half of V2)
4978 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
4979 // VPDI V1, V2, 1 (high half of V1, low half of V2)
4981 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
4982};
4983
4984// Called after matching a vector shuffle against a particular pattern.
4985// Both the original shuffle and the pattern have two vector operands.
4986// OpNos[0] is the operand of the original shuffle that should be used for
4987// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
4988// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
4989// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
4990// for operands 0 and 1 of the pattern.
4991static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
4992 if (OpNos[0] < 0) {
4993 if (OpNos[1] < 0)
4994 return false;
4995 OpNo0 = OpNo1 = OpNos[1];
4996 } else if (OpNos[1] < 0) {
4997 OpNo0 = OpNo1 = OpNos[0];
4998 } else {
4999 OpNo0 = OpNos[0];
5000 OpNo1 = OpNos[1];
5001 }
5002 return true;
5003}
5004
5005// Bytes is a VPERM-like permute vector, except that -1 is used for
5006// undefined bytes. Return true if the VPERM can be implemented using P.
5007// When returning true set OpNo0 to the VPERM operand that should be
5008// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5009//
5010// For example, if swapping the VPERM operands allows P to match, OpNo0
5011// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5012// operand, but rewriting it to use two duplicated operands allows it to
5013// match P, then OpNo0 and OpNo1 will be the same.
5014static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5015 unsigned &OpNo0, unsigned &OpNo1) {
5016 int OpNos[] = { -1, -1 };
5017 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5018 int Elt = Bytes[I];
5019 if (Elt >= 0) {
5020 // Make sure that the two permute vectors use the same suboperand
5021 // byte number. Only the operand numbers (the high bits) are
5022 // allowed to differ.
5023 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5024 return false;
5025 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5026 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5027 // Make sure that the operand mappings are consistent with previous
5028 // elements.
5029 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5030 return false;
5031 OpNos[ModelOpNo] = RealOpNo;
5032 }
5033 }
5034 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5035}
5036
5037// As above, but search for a matching permute.
5038static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5039 unsigned &OpNo0, unsigned &OpNo1) {
5040 for (auto &P : PermuteForms)
5041 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5042 return &P;
5043 return nullptr;
5044}
5045
5046// Bytes is a VPERM-like permute vector, except that -1 is used for
5047// undefined bytes. This permute is an operand of an outer permute.
5048// See whether redistributing the -1 bytes gives a shuffle that can be
5049// implemented using P. If so, set Transform to a VPERM-like permute vector
5050// that, when applied to the result of P, gives the original permute in Bytes.
5052 const Permute &P,
5053 SmallVectorImpl<int> &Transform) {
5054 unsigned To = 0;
5055 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5056 int Elt = Bytes[From];
5057 if (Elt < 0)
5058 // Byte number From of the result is undefined.
5059 Transform[From] = -1;
5060 else {
5061 while (P.Bytes[To] != Elt) {
5062 To += 1;
5063 if (To == SystemZ::VectorBytes)
5064 return false;
5065 }
5066 Transform[From] = To;
5067 }
5068 }
5069 return true;
5070}
5071
5072// As above, but search for a matching permute.
5073static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5074 SmallVectorImpl<int> &Transform) {
5075 for (auto &P : PermuteForms)
5076 if (matchDoublePermute(Bytes, P, Transform))
5077 return &P;
5078 return nullptr;
5079}
5080
5081// Convert the mask of the given shuffle op into a byte-level mask,
5082// as if it had type vNi8.
5083static bool getVPermMask(SDValue ShuffleOp,
5084 SmallVectorImpl<int> &Bytes) {
5085 EVT VT = ShuffleOp.getValueType();
5086 unsigned NumElements = VT.getVectorNumElements();
5087 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5088
5089 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5090 Bytes.resize(NumElements * BytesPerElement, -1);
5091 for (unsigned I = 0; I < NumElements; ++I) {
5092 int Index = VSN->getMaskElt(I);
5093 if (Index >= 0)
5094 for (unsigned J = 0; J < BytesPerElement; ++J)
5095 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5096 }
5097 return true;
5098 }
5099 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5100 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5101 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5102 Bytes.resize(NumElements * BytesPerElement, -1);
5103 for (unsigned I = 0; I < NumElements; ++I)
5104 for (unsigned J = 0; J < BytesPerElement; ++J)
5105 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5106 return true;
5107 }
5108 return false;
5109}
5110
5111// Bytes is a VPERM-like permute vector, except that -1 is used for
5112// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5113// the result come from a contiguous sequence of bytes from one input.
5114// Set Base to the selector for the first byte if so.
5115static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5116 unsigned BytesPerElement, int &Base) {
5117 Base = -1;
5118 for (unsigned I = 0; I < BytesPerElement; ++I) {
5119 if (Bytes[Start + I] >= 0) {
5120 unsigned Elem = Bytes[Start + I];
5121 if (Base < 0) {
5122 Base = Elem - I;
5123 // Make sure the bytes would come from one input operand.
5124 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5125 return false;
5126 } else if (unsigned(Base) != Elem - I)
5127 return false;
5128 }
5129 }
5130 return true;
5131}
5132
5133// Bytes is a VPERM-like permute vector, except that -1 is used for
5134// undefined bytes. Return true if it can be performed using VSLDB.
5135// When returning true, set StartIndex to the shift amount and OpNo0
5136// and OpNo1 to the VPERM operands that should be used as the first
5137// and second shift operand respectively.
5139 unsigned &StartIndex, unsigned &OpNo0,
5140 unsigned &OpNo1) {
5141 int OpNos[] = { -1, -1 };
5142 int Shift = -1;
5143 for (unsigned I = 0; I < 16; ++I) {
5144 int Index = Bytes[I];
5145 if (Index >= 0) {
5146 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5147 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5148 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5149 if (Shift < 0)
5150 Shift = ExpectedShift;
5151 else if (Shift != ExpectedShift)
5152 return false;
5153 // Make sure that the operand mappings are consistent with previous
5154 // elements.
5155 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5156 return false;
5157 OpNos[ModelOpNo] = RealOpNo;
5158 }
5159 }
5160 StartIndex = Shift;
5161 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5162}
5163
5164// Create a node that performs P on operands Op0 and Op1, casting the
5165// operands to the appropriate type. The type of the result is determined by P.
5167 const Permute &P, SDValue Op0, SDValue Op1) {
5168 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5169 // elements of a PACK are twice as wide as the outputs.
5170 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5171 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5172 P.Operand);
5173 // Cast both operands to the appropriate type.
5174 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5175 SystemZ::VectorBytes / InBytes);
5176 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5177 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5178 SDValue Op;
5179 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5180 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5181 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5182 } else if (P.Opcode == SystemZISD::PACK) {
5183 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5184 SystemZ::VectorBytes / P.Operand);
5185 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5186 } else {
5187 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5188 }
5189 return Op;
5190}
5191
5192static bool isZeroVector(SDValue N) {
5193 if (N->getOpcode() == ISD::BITCAST)
5194 N = N->getOperand(0);
5195 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5196 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5197 return Op->getZExtValue() == 0;
5198 return ISD::isBuildVectorAllZeros(N.getNode());
5199}
5200
5201// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5202static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5203 for (unsigned I = 0; I < Num ; I++)
5204 if (isZeroVector(Ops[I]))
5205 return I;
5206 return UINT32_MAX;
5207}
5208
5209// Bytes is a VPERM-like permute vector, except that -1 is used for
5210// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5211// VSLDB or VPERM.
5213 SDValue *Ops,
5214 const SmallVectorImpl<int> &Bytes) {
5215 for (unsigned I = 0; I < 2; ++I)
5216 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5217
5218 // First see whether VSLDB can be used.
5219 unsigned StartIndex, OpNo0, OpNo1;
5220 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5221 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5222 Ops[OpNo1],
5223 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5224
5225 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5226 // eliminate a zero vector by reusing any zero index in the permute vector.
5227 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5228 if (ZeroVecIdx != UINT32_MAX) {
5229 bool MaskFirst = true;
5230 int ZeroIdx = -1;
5231 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5232 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5233 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5234 if (OpNo == ZeroVecIdx && I == 0) {
5235 // If the first byte is zero, use mask as first operand.
5236 ZeroIdx = 0;
5237 break;
5238 }
5239 if (OpNo != ZeroVecIdx && Byte == 0) {
5240 // If mask contains a zero, use it by placing that vector first.
5241 ZeroIdx = I + SystemZ::VectorBytes;
5242 MaskFirst = false;
5243 break;
5244 }
5245 }
5246 if (ZeroIdx != -1) {
5247 SDValue IndexNodes[SystemZ::VectorBytes];
5248 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5249 if (Bytes[I] >= 0) {
5250 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5251 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5252 if (OpNo == ZeroVecIdx)
5253 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5254 else {
5255 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5256 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5257 }
5258 } else
5259 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5260 }
5261 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5262 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5263 if (MaskFirst)
5264 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5265 Mask);
5266 else
5267 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5268 Mask);
5269 }
5270 }
5271
5272 SDValue IndexNodes[SystemZ::VectorBytes];
5273 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5274 if (Bytes[I] >= 0)
5275 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5276 else
5277 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5278 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5279 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5280 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5281}
5282
5283namespace {
5284// Describes a general N-operand vector shuffle.
5285struct GeneralShuffle {
5286 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}
5287 void addUndef();
5288 bool add(SDValue, unsigned);
5289 SDValue getNode(SelectionDAG &, const SDLoc &);
5290 void tryPrepareForUnpack();
5291 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5292 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5293
5294 // The operands of the shuffle.
5296
5297 // Index I is -1 if byte I of the result is undefined. Otherwise the
5298 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5299 // Bytes[I] / SystemZ::VectorBytes.
5301
5302 // The type of the shuffle result.
5303 EVT VT;
5304
5305 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5306 unsigned UnpackFromEltSize;
5307};
5308}
5309
5310// Add an extra undefined element to the shuffle.
5311void GeneralShuffle::addUndef() {
5312 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5313 for (unsigned I = 0; I < BytesPerElement; ++I)
5314 Bytes.push_back(-1);
5315}
5316
5317// Add an extra element to the shuffle, taking it from element Elem of Op.
5318// A null Op indicates a vector input whose value will be calculated later;
5319// there is at most one such input per shuffle and it always has the same
5320// type as the result. Aborts and returns false if the source vector elements
5321// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
5322// LLVM they become implicitly extended, but this is rare and not optimized.
5323bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
5324 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5325
5326 // The source vector can have wider elements than the result,
5327 // either through an explicit TRUNCATE or because of type legalization.
5328 // We want the least significant part.
5329 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
5330 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
5331
5332 // Return false if the source elements are smaller than their destination
5333 // elements.
5334 if (FromBytesPerElement < BytesPerElement)
5335 return false;
5336
5337 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
5338 (FromBytesPerElement - BytesPerElement));
5339
5340 // Look through things like shuffles and bitcasts.
5341 while (Op.getNode()) {
5342 if (Op.getOpcode() == ISD::BITCAST)
5343 Op = Op.getOperand(0);
5344 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
5345 // See whether the bytes we need come from a contiguous part of one
5346 // operand.
5348 if (!getVPermMask(Op, OpBytes))
5349 break;
5350 int NewByte;
5351 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
5352 break;
5353 if (NewByte < 0) {
5354 addUndef();
5355 return true;
5356 }
5357 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
5358 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
5359 } else if (Op.isUndef()) {
5360 addUndef();
5361 return true;
5362 } else
5363 break;
5364 }
5365
5366 // Make sure that the source of the extraction is in Ops.
5367 unsigned OpNo = 0;
5368 for (; OpNo < Ops.size(); ++OpNo)
5369 if (Ops[OpNo] == Op)
5370 break;
5371 if (OpNo == Ops.size())
5372 Ops.push_back(Op);
5373
5374 // Add the element to Bytes.
5375 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
5376 for (unsigned I = 0; I < BytesPerElement; ++I)
5377 Bytes.push_back(Base + I);
5378
5379 return true;
5380}
5381
5382// Return SDNodes for the completed shuffle.
5383SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
5384 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
5385
5386 if (Ops.size() == 0)
5387 return DAG.getUNDEF(VT);
5388
5389 // Use a single unpack if possible as the last operation.
5390 tryPrepareForUnpack();
5391
5392 // Make sure that there are at least two shuffle operands.
5393 if (Ops.size() == 1)
5394 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
5395
5396 // Create a tree of shuffles, deferring root node until after the loop.
5397 // Try to redistribute the undefined elements of non-root nodes so that
5398 // the non-root shuffles match something like a pack or merge, then adjust
5399 // the parent node's permute vector to compensate for the new order.
5400 // Among other things, this copes with vectors like <2 x i16> that were
5401 // padded with undefined elements during type legalization.
5402 //
5403 // In the best case this redistribution will lead to the whole tree
5404 // using packs and merges. It should rarely be a loss in other cases.
5405 unsigned Stride = 1;
5406 for (; Stride * 2 < Ops.size(); Stride *= 2) {
5407 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
5408 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
5409
5410 // Create a mask for just these two operands.
5412 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5413 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
5414 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
5415 if (OpNo == I)
5416 NewBytes[J] = Byte;
5417 else if (OpNo == I + Stride)
5418 NewBytes[J] = SystemZ::VectorBytes + Byte;
5419 else
5420 NewBytes[J] = -1;
5421 }
5422 // See if it would be better to reorganize NewMask to avoid using VPERM.
5424 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
5425 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
5426 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
5427 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5428 if (NewBytes[J] >= 0) {
5429 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
5430 "Invalid double permute");
5431 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
5432 } else
5433 assert(NewBytesMap[J] < 0 && "Invalid double permute");
5434 }
5435 } else {
5436 // Just use NewBytes on the operands.
5437 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
5438 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
5439 if (NewBytes[J] >= 0)
5440 Bytes[J] = I * SystemZ::VectorBytes + J;
5441 }
5442 }
5443 }
5444
5445 // Now we just have 2 inputs. Put the second operand in Ops[1].
5446 if (Stride > 1) {
5447 Ops[1] = Ops[Stride];
5448 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5449 if (Bytes[I] >= int(SystemZ::VectorBytes))
5450 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
5451 }
5452
5453 // Look for an instruction that can do the permute without resorting
5454 // to VPERM.
5455 unsigned OpNo0, OpNo1;
5456 SDValue Op;
5457 if (unpackWasPrepared() && Ops[1].isUndef())
5458 Op = Ops[0];
5459 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
5460 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
5461 else
5462 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
5463
5464 Op = insertUnpackIfPrepared(DAG, DL, Op);
5465
5466 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5467}
5468
5469#ifndef NDEBUG
5470static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
5471 dbgs() << Msg.c_str() << " { ";
5472 for (unsigned i = 0; i < Bytes.size(); i++)
5473 dbgs() << Bytes[i] << " ";
5474 dbgs() << "}\n";
5475}
5476#endif
5477
5478// If the Bytes vector matches an unpack operation, prepare to do the unpack
5479// after all else by removing the zero vector and the effect of the unpack on
5480// Bytes.
5481void GeneralShuffle::tryPrepareForUnpack() {
5482 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
5483 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
5484 return;
5485
5486 // Only do this if removing the zero vector reduces the depth, otherwise
5487 // the critical path will increase with the final unpack.
5488 if (Ops.size() > 2 &&
5489 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
5490 return;
5491
5492 // Find an unpack that would allow removing the zero vector from Ops.
5493 UnpackFromEltSize = 1;
5494 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
5495 bool MatchUnpack = true;
5497 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
5498 unsigned ToEltSize = UnpackFromEltSize * 2;
5499 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
5500 if (!IsZextByte)
5501 SrcBytes.push_back(Bytes[Elt]);
5502 if (Bytes[Elt] != -1) {
5503 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
5504 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
5505 MatchUnpack = false;
5506 break;
5507 }
5508 }
5509 }
5510 if (MatchUnpack) {
5511 if (Ops.size() == 2) {
5512 // Don't use unpack if a single source operand needs rearrangement.
5513 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
5514 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
5515 UnpackFromEltSize = UINT_MAX;
5516 return;
5517 }
5518 }
5519 break;
5520 }
5521 }
5522 if (UnpackFromEltSize > 4)
5523 return;
5524
5525 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
5526 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
5527 << ".\n";
5528 dumpBytes(Bytes, "Original Bytes vector:"););
5529
5530 // Apply the unpack in reverse to the Bytes array.
5531 unsigned B = 0;
5532 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
5533 Elt += UnpackFromEltSize;
5534 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
5535 Bytes[B] = Bytes[Elt];
5536 }
5537 while (B < SystemZ::VectorBytes)
5538 Bytes[B++] = -1;
5539
5540 // Remove the zero vector from Ops
5541 Ops.erase(&Ops[ZeroVecOpNo]);
5542 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5543 if (Bytes[I] >= 0) {
5544 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5545 if (OpNo > ZeroVecOpNo)
5546 Bytes[I] -= SystemZ::VectorBytes;
5547 }
5548
5549 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
5550 dbgs() << "\n";);
5551}
5552
5553SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
5554 const SDLoc &DL,
5555 SDValue Op) {
5556 if (!unpackWasPrepared())
5557 return Op;
5558 unsigned InBits = UnpackFromEltSize * 8;
5559 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
5560 SystemZ::VectorBits / InBits);
5561 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
5562 unsigned OutBits = InBits * 2;
5563 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
5564 SystemZ::VectorBits / OutBits);
5565 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
5566}
5567
5568// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
5570 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
5571 if (!Op.getOperand(I).isUndef())
5572 return false;
5573 return true;
5574}
5575
5576// Return a vector of type VT that contains Value in the first element.
5577// The other elements don't matter.
5579 SDValue Value) {
5580 // If we have a constant, replicate it to all elements and let the
5581 // BUILD_VECTOR lowering take care of it.
5582 if (Value.getOpcode() == ISD::Constant ||
5583 Value.getOpcode() == ISD::ConstantFP) {
5585 return DAG.getBuildVector(VT, DL, Ops);
5586 }
5587 if (Value.isUndef())
5588 return DAG.getUNDEF(VT);
5589 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
5590}
5591
5592// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
5593// element 1. Used for cases in which replication is cheap.
5595 SDValue Op0, SDValue Op1) {
5596 if (Op0.isUndef()) {
5597 if (Op1.isUndef())
5598 return DAG.getUNDEF(VT);
5599 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
5600 }
5601 if (Op1.isUndef())
5602 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
5603 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
5604 buildScalarToVector(DAG, DL, VT, Op0),
5605 buildScalarToVector(DAG, DL, VT, Op1));
5606}
5607
5608// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
5609// vector for them.
5611 SDValue Op1) {
5612 if (Op0.isUndef() && Op1.isUndef())
5613 return DAG.getUNDEF(MVT::v2i64);
5614 // If one of the two inputs is undefined then replicate the other one,
5615 // in order to avoid using another register unnecessarily.
5616 if (Op0.isUndef())
5617 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5618 else if (Op1.isUndef())
5619 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5620 else {
5621 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5622 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5623 }
5624 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
5625}
5626
5627// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
5628// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
5629// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
5630// would benefit from this representation and return it if so.
5632 BuildVectorSDNode *BVN) {
5633 EVT VT = BVN->getValueType(0);
5634 unsigned NumElements = VT.getVectorNumElements();
5635
5636 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
5637 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
5638 // need a BUILD_VECTOR, add an additional placeholder operand for that
5639 // BUILD_VECTOR and store its operands in ResidueOps.
5640 GeneralShuffle GS(VT);
5642 bool FoundOne = false;
5643 for (unsigned I = 0; I < NumElements; ++I) {
5644 SDValue Op = BVN->getOperand(I);
5645 if (Op.getOpcode() == ISD::TRUNCATE)
5646 Op = Op.getOperand(0);
5647 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5648 Op.getOperand(1).getOpcode() == ISD::Constant) {
5649 unsigned Elem = Op.getConstantOperandVal(1);
5650 if (!GS.add(Op.getOperand(0), Elem))
5651 return SDValue();
5652 FoundOne = true;
5653 } else if (Op.isUndef()) {
5654 GS.addUndef();
5655 } else {
5656 if (!GS.add(SDValue(), ResidueOps.size()))
5657 return SDValue();
5658 ResidueOps.push_back(BVN->getOperand(I));
5659 }
5660 }
5661
5662 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
5663 if (!FoundOne)
5664 return SDValue();
5665
5666 // Create the BUILD_VECTOR for the remaining elements, if any.
5667 if (!ResidueOps.empty()) {
5668 while (ResidueOps.size() < NumElements)
5669 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
5670 for (auto &Op : GS.Ops) {
5671 if (!Op.getNode()) {
5672 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
5673 break;
5674 }
5675 }
5676 }
5677 return GS.getNode(DAG, SDLoc(BVN));
5678}
5679
5680bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
5681 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
5682 return true;
5683 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
5684 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
5685 return true;
5686 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
5687 return true;
5688 return false;
5689}
5690
5691// Combine GPR scalar values Elems into a vector of type VT.
5692SDValue
5693SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5694 SmallVectorImpl<SDValue> &Elems) const {
5695 // See whether there is a single replicated value.
5697 unsigned int NumElements = Elems.size();
5698 unsigned int Count = 0;
5699 for (auto Elem : Elems) {
5700 if (!Elem.isUndef()) {
5701 if (!Single.getNode())
5702 Single = Elem;
5703 else if (Elem != Single) {
5704 Single = SDValue();
5705 break;
5706 }
5707 Count += 1;
5708 }
5709 }
5710 // There are three cases here:
5711 //
5712 // - if the only defined element is a loaded one, the best sequence
5713 // is a replicating load.
5714 //
5715 // - otherwise, if the only defined element is an i64 value, we will
5716 // end up with the same VLVGP sequence regardless of whether we short-cut
5717 // for replication or fall through to the later code.
5718 //
5719 // - otherwise, if the only defined element is an i32 or smaller value,
5720 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
5721 // This is only a win if the single defined element is used more than once.
5722 // In other cases we're better off using a single VLVGx.
5723 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
5724 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
5725
5726 // If all elements are loads, use VLREP/VLEs (below).
5727 bool AllLoads = true;
5728 for (auto Elem : Elems)
5729 if (!isVectorElementLoad(Elem)) {
5730 AllLoads = false;
5731 break;
5732 }
5733
5734 // The best way of building a v2i64 from two i64s is to use VLVGP.
5735 if (VT == MVT::v2i64 && !AllLoads)
5736 return joinDwords(DAG, DL, Elems[0], Elems[1]);
5737
5738 // Use a 64-bit merge high to combine two doubles.
5739 if (VT == MVT::v2f64 && !AllLoads)
5740 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5741
5742 // Build v4f32 values directly from the FPRs:
5743 //
5744 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
5745 // V V VMRHF
5746 // <ABxx> <CDxx>
5747 // V VMRHG
5748 // <ABCD>
5749 if (VT == MVT::v4f32 && !AllLoads) {
5750 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5751 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
5752 // Avoid unnecessary undefs by reusing the other operand.
5753 if (Op01.isUndef())
5754 Op01 = Op23;
5755 else if (Op23.isUndef())
5756 Op23 = Op01;
5757 // Merging identical replications is a no-op.
5758 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
5759 return Op01;
5760 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
5761 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
5763 DL, MVT::v2i64, Op01, Op23);
5764 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5765 }
5766
5767 // Collect the constant terms.
5770
5771 unsigned NumConstants = 0;
5772 for (unsigned I = 0; I < NumElements; ++I) {
5773 SDValue Elem = Elems[I];
5774 if (Elem.getOpcode() == ISD::Constant ||
5775 Elem.getOpcode() == ISD::ConstantFP) {
5776 NumConstants += 1;
5777 Constants[I] = Elem;
5778 Done[I] = true;
5779 }
5780 }
5781 // If there was at least one constant, fill in the other elements of
5782 // Constants with undefs to get a full vector constant and use that
5783 // as the starting point.
5785 SDValue ReplicatedVal;
5786 if (NumConstants > 0) {
5787 for (unsigned I = 0; I < NumElements; ++I)
5788 if (!Constants[I].getNode())
5789 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
5790 Result = DAG.getBuildVector(VT, DL, Constants);
5791 } else {
5792 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
5793 // avoid a false dependency on any previous contents of the vector
5794 // register.
5795
5796 // Use a VLREP if at least one element is a load. Make sure to replicate
5797 // the load with the most elements having its value.
5798 std::map<const SDNode*, unsigned> UseCounts;
5799 SDNode *LoadMaxUses = nullptr;
5800 for (unsigned I = 0; I < NumElements; ++I)
5801 if (isVectorElementLoad(Elems[I])) {
5802 SDNode *Ld = Elems[I].getNode();
5803 UseCounts[Ld]++;
5804 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
5805 LoadMaxUses = Ld;
5806 }
5807 if (LoadMaxUses != nullptr) {
5808 ReplicatedVal = SDValue(LoadMaxUses, 0);
5809 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
5810 } else {
5811 // Try to use VLVGP.
5812 unsigned I1 = NumElements / 2 - 1;
5813 unsigned I2 = NumElements - 1;
5814 bool Def1 = !Elems[I1].isUndef();
5815 bool Def2 = !Elems[I2].isUndef();
5816 if (Def1 || Def2) {
5817 SDValue Elem1 = Elems[Def1 ? I1 : I2];
5818 SDValue Elem2 = Elems[Def2 ? I2 : I1];
5819 Result = DAG.getNode(ISD::BITCAST, DL, VT,
5820 joinDwords(DAG, DL, Elem1, Elem2));
5821 Done[I1] = true;
5822 Done[I2] = true;
5823 } else
5824 Result = DAG.getUNDEF(VT);
5825 }
5826 }
5827
5828 // Use VLVGx to insert the other elements.
5829 for (unsigned I = 0; I < NumElements; ++I)
5830 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
5831 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
5832 DAG.getConstant(I, DL, MVT::i32));
5833 return Result;
5834}
5835
5836SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
5837 SelectionDAG &DAG) const {
5838 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
5839 SDLoc DL(Op);
5840 EVT VT = Op.getValueType();
5841
5842 if (BVN->isConstant()) {
5843 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
5844 return Op;
5845
5846 // Fall back to loading it from memory.
5847 return SDValue();
5848 }
5849
5850 // See if we should use shuffles to construct the vector from other vectors.
5851 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
5852 return Res;
5853
5854 // Detect SCALAR_TO_VECTOR conversions.
5856 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
5857
5858 // Otherwise use buildVector to build the vector up from GPRs.
5859 unsigned NumElements = Op.getNumOperands();
5861 for (unsigned I = 0; I < NumElements; ++I)
5862 Ops[I] = Op.getOperand(I);
5863 return buildVector(DAG, DL, VT, Ops);
5864}
5865
5866SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
5867 SelectionDAG &DAG) const {
5868 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
5869 SDLoc DL(Op);
5870 EVT VT = Op.getValueType();
5871 unsigned NumElements = VT.getVectorNumElements();
5872
5873 if (VSN->isSplat()) {
5874 SDValue Op0 = Op.getOperand(0);
5875 unsigned Index = VSN->getSplatIndex();
5877 "Splat index should be defined and in first operand");
5878 // See whether the value we're splatting is directly available as a scalar.
5879 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5881 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
5882 // Otherwise keep it as a vector-to-vector operation.
5883 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
5884 DAG.getTargetConstant(Index, DL, MVT::i32));
5885 }
5886
5887 GeneralShuffle GS(VT);
5888 for (unsigned I = 0; I < NumElements; ++I) {
5889 int Elt = VSN->getMaskElt(I);
5890 if (Elt < 0)
5891 GS.addUndef();
5892 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
5893 unsigned(Elt) % NumElements))
5894 return SDValue();
5895 }
5896 return GS.getNode(DAG, SDLoc(VSN));
5897}
5898
5899SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
5900 SelectionDAG &DAG) const {
5901 SDLoc DL(Op);
5902 // Just insert the scalar into element 0 of an undefined vector.
5903 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
5904 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
5905 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
5906}
5907
5908SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
5909 SelectionDAG &DAG) const {
5910 // Handle insertions of floating-point values.
5911 SDLoc DL(Op);
5912 SDValue Op0 = Op.getOperand(0);
5913 SDValue Op1 = Op.getOperand(1);
5914 SDValue Op2 = Op.getOperand(2);
5915 EVT VT = Op.getValueType();
5916
5917 // Insertions into constant indices of a v2f64 can be done using VPDI.
5918 // However, if the inserted value is a bitcast or a constant then it's
5919 // better to use GPRs, as below.
5920 if (VT == MVT::v2f64 &&
5921 Op1.getOpcode() != ISD::BITCAST &&
5922 Op1.getOpcode() != ISD::ConstantFP &&
5923 Op2.getOpcode() == ISD::Constant) {
5924 uint64_t Index = Op2->getAsZExtVal();
5925 unsigned Mask = VT.getVectorNumElements() - 1;
5926 if (Index <= Mask)
5927 return Op;
5928 }
5929
5930 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
5932 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
5933 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
5934 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
5935 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
5936 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5937}
5938
5939SDValue
5940SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
5941 SelectionDAG &DAG) const {
5942 // Handle extractions of floating-point values.
5943 SDLoc DL(Op);
5944 SDValue Op0 = Op.getOperand(0);
5945 SDValue Op1 = Op.getOperand(1);
5946 EVT VT = Op.getValueType();
5947 EVT VecVT = Op0.getValueType();
5948
5949 // Extractions of constant indices can be done directly.
5950 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
5951 uint64_t Index = CIndexN->getZExtValue();
5952 unsigned Mask = VecVT.getVectorNumElements() - 1;
5953 if (Index <= Mask)
5954 return Op;
5955 }
5956
5957 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
5958 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
5959 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
5960 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
5961 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
5962 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5963}
5964
5965SDValue SystemZTargetLowering::
5966lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5967 SDValue PackedOp = Op.getOperand(0);
5968 EVT OutVT = Op.getValueType();
5969 EVT InVT = PackedOp.getValueType();
5970 unsigned ToBits = OutVT.getScalarSizeInBits();
5971 unsigned FromBits = InVT.getScalarSizeInBits();
5972 do {
5973 FromBits *= 2;
5974 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
5975 SystemZ::VectorBits / FromBits);
5976 PackedOp =
5977 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
5978 } while (FromBits != ToBits);
5979 return PackedOp;
5980}
5981
5982// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
5983SDValue SystemZTargetLowering::
5984lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5985 SDValue PackedOp = Op.getOperand(0);
5986 SDLoc DL(Op);
5987 EVT OutVT = Op.getValueType();
5988 EVT InVT = PackedOp.getValueType();
5989 unsigned InNumElts = InVT.getVectorNumElements();
5990 unsigned OutNumElts = OutVT.getVectorNumElements();
5991 unsigned NumInPerOut = InNumElts / OutNumElts;
5992
5993 SDValue ZeroVec =
5994 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
5995
5996 SmallVector<int, 16> Mask(InNumElts);
5997 unsigned ZeroVecElt = InNumElts;
5998 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
5999 unsigned MaskElt = PackedElt * NumInPerOut;
6000 unsigned End = MaskElt + NumInPerOut - 1;
6001 for (; MaskElt < End; MaskElt++)
6002 Mask[MaskElt] = ZeroVecElt++;
6003 Mask[MaskElt] = PackedElt;
6004 }
6005 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
6006 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
6007}
6008
6009SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6010 unsigned ByScalar) const {
6011 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6012 SDValue Op0 = Op.getOperand(0);
6013 SDValue Op1 = Op.getOperand(1);
6014 SDLoc DL(Op);
6015 EVT VT = Op.getValueType();
6016 unsigned ElemBitSize = VT.getScalarSizeInBits();
6017
6018 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6019 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6020 APInt SplatBits, SplatUndef;
6021 unsigned SplatBitSize;
6022 bool HasAnyUndefs;
6023 // Check for constant splats. Use ElemBitSize as the minimum element
6024 // width and reject splats that need wider elements.
6025 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6026 ElemBitSize, true) &&
6027 SplatBitSize == ElemBitSize) {
6028 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6029 DL, MVT::i32);
6030 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6031 }
6032 // Check for variable splats.
6033 BitVector UndefElements;
6034 SDValue Splat = BVN->getSplatValue(&UndefElements);
6035 if (Splat) {
6036 // Since i32 is the smallest legal type, we either need a no-op
6037 // or a truncation.
6038 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6039 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6040 }
6041 }
6042
6043 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6044 // and the shift amount is directly available in a GPR.
6045 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6046 if (VSN->isSplat()) {
6047 SDValue VSNOp0 = VSN->getOperand(0);
6048 unsigned Index = VSN->getSplatIndex();
6050 "Splat index should be defined and in first operand");
6051 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6052 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6053 // Since i32 is the smallest legal type, we either need a no-op
6054 // or a truncation.
6055 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6056 VSNOp0.getOperand(Index));
6057 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6058 }
6059 }
6060 }
6061
6062 // Otherwise just treat the current form as legal.
6063 return Op;
6064}
6065
6066SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
6067 SelectionDAG &DAG) const {
6068 SDLoc DL(Op);
6069 MVT ResultVT = Op.getSimpleValueType();
6070 SDValue Arg = Op.getOperand(0);
6071 unsigned Check = Op.getConstantOperandVal(1);
6072
6073 unsigned TDCMask = 0;
6074 if (Check & fcSNan)
6076 if (Check & fcQNan)
6078 if (Check & fcPosInf)
6080 if (Check & fcNegInf)
6082 if (Check & fcPosNormal)
6084 if (Check & fcNegNormal)
6086 if (Check & fcPosSubnormal)
6088 if (Check & fcNegSubnormal)
6090 if (Check & fcPosZero)
6091 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
6092 if (Check & fcNegZero)
6093 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
6094 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
6095
6096 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
6097 return getCCResult(DAG, Intr);
6098}
6099
6100SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
6101 SelectionDAG &DAG) const {
6102 SDLoc DL(Op);
6103 SDValue Chain = Op.getOperand(0);
6104
6105 // STCKF only supports a memory operand, so we have to use a temporary.
6106 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
6107 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
6108 MachinePointerInfo MPI =
6110
6111 // Use STCFK to store the TOD clock into the temporary.
6112 SDValue StoreOps[] = {Chain, StackPtr};
6113 Chain = DAG.getMemIntrinsicNode(
6114 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
6116
6117 // And read it back from there.
6118 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
6119}
6120
6122 SelectionDAG &DAG) const {
6123 switch (Op.getOpcode()) {
6124 case ISD::FRAMEADDR:
6125 return lowerFRAMEADDR(Op, DAG);
6126 case ISD::RETURNADDR:
6127 return lowerRETURNADDR(Op, DAG);
6128 case ISD::BR_CC:
6129 return lowerBR_CC(Op, DAG);
6130 case ISD::SELECT_CC:
6131 return lowerSELECT_CC(Op, DAG);
6132 case ISD::SETCC:
6133 return lowerSETCC(Op, DAG);
6134 case ISD::STRICT_FSETCC:
6135 return lowerSTRICT_FSETCC(Op, DAG, false);
6137 return lowerSTRICT_FSETCC(Op, DAG, true);
6138 case ISD::GlobalAddress:
6139 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
6141 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
6142 case ISD::BlockAddress:
6143 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
6144 case ISD::JumpTable:
6145 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
6146 case ISD::ConstantPool:
6147 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
6148 case ISD::BITCAST:
6149 return lowerBITCAST(Op, DAG);
6150 case ISD::VASTART:
6151 return lowerVASTART(Op, DAG);
6152 case ISD::VACOPY:
6153 return lowerVACOPY(Op, DAG);
6155 return lowerDYNAMIC_STACKALLOC(Op, DAG);
6157 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
6158 case ISD::SMUL_LOHI:
6159 return lowerSMUL_LOHI(Op, DAG);
6160 case ISD::UMUL_LOHI:
6161 return lowerUMUL_LOHI(Op, DAG);
6162 case ISD::SDIVREM:
6163 return lowerSDIVREM(Op, DAG);
6164 case ISD::UDIVREM:
6165 return lowerUDIVREM(Op, DAG);
6166 case ISD::SADDO:
6167 case ISD::SSUBO:
6168 case ISD::UADDO:
6169 case ISD::USUBO:
6170 return lowerXALUO(Op, DAG);
6171 case ISD::UADDO_CARRY:
6172 case ISD::USUBO_CARRY:
6173 return lowerUADDSUBO_CARRY(Op, DAG);
6174 case ISD::OR:
6175 return lowerOR(Op, DAG);
6176 case ISD::CTPOP:
6177 return lowerCTPOP(Op, DAG);
6178 case ISD::VECREDUCE_ADD:
6179 return lowerVECREDUCE_ADD(Op, DAG);
6180 case ISD::ATOMIC_FENCE:
6181 return lowerATOMIC_FENCE(Op, DAG);
6182 case ISD::ATOMIC_SWAP:
6183 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
6184 case ISD::ATOMIC_STORE:
6185 case ISD::ATOMIC_LOAD:
6186 return lowerATOMIC_LDST_I128(Op, DAG);
6188 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
6190 return lowerATOMIC_LOAD_SUB(Op, DAG);
6192 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
6194 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
6196 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
6198 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
6200 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
6202 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
6204 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
6206 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
6208 return lowerATOMIC_CMP_SWAP(Op, DAG);
6209 case ISD::STACKSAVE:
6210 return lowerSTACKSAVE(Op, DAG);
6211 case ISD::STACKRESTORE:
6212 return lowerSTACKRESTORE(Op, DAG);
6213 case ISD::PREFETCH:
6214 return lowerPREFETCH(Op, DAG);
6216 return lowerINTRINSIC_W_CHAIN(Op, DAG);
6218 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
6219 case ISD::BUILD_VECTOR:
6220 return lowerBUILD_VECTOR(Op, DAG);
6222 return lowerVECTOR_SHUFFLE(Op, DAG);
6224 return lowerSCALAR_TO_VECTOR(Op, DAG);
6226 return lowerINSERT_VECTOR_ELT(Op, DAG);
6228 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6230 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
6232 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
6233 case ISD::SHL:
6234 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
6235 case ISD::SRL:
6236 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
6237 case ISD::SRA:
6238 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
6239 case ISD::ROTL:
6240 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
6241 case ISD::IS_FPCLASS:
6242 return lowerIS_FPCLASS(Op, DAG);
6243 case ISD::GET_ROUNDING:
6244 return lowerGET_ROUNDING(Op, DAG);
6246 return lowerREADCYCLECOUNTER(Op, DAG);
6247 default:
6248 llvm_unreachable("Unexpected node to lower");
6249 }
6250}
6251
6253 const SDLoc &SL) {
6254 // If i128 is legal, just use a normal bitcast.
6255 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
6256 return DAG.getBitcast(MVT::f128, Src);
6257
6258 // Otherwise, f128 must live in FP128, so do a partwise move.
6260 &SystemZ::FP128BitRegClass);
6261
6262 SDValue Hi, Lo;
6263 std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);
6264
6265 Hi = DAG.getBitcast(MVT::f64, Hi);
6266 Lo = DAG.getBitcast(MVT::f64, Lo);
6267
6268 SDNode *Pair = DAG.getMachineNode(
6269 SystemZ::REG_SEQUENCE, SL, MVT::f128,
6270 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo,
6271 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi,
6272 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)});
6273 return SDValue(Pair, 0);
6274}
6275
6277 const SDLoc &SL) {
6278 // If i128 is legal, just use a normal bitcast.
6279 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
6280 return DAG.getBitcast(MVT::i128, Src);
6281
6282 // Otherwise, f128 must live in FP128, so do a partwise move.
6284 &SystemZ::FP128BitRegClass);
6285
6286 SDValue LoFP =
6287 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
6288 SDValue HiFP =
6289 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);
6290 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
6291 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
6292
6293 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);
6294}
6295
6296// Lower operations with invalid operand or result types (currently used
6297// only for 128-bit integer types).
6298void
6301 SelectionDAG &DAG) const {
6302 switch (N->getOpcode()) {
6303 case ISD::ATOMIC_LOAD: {
6304 SDLoc DL(N);
6305 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
6306 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
6307 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6309 DL, Tys, Ops, MVT::i128, MMO);
6310
6311 SDValue Lowered = lowerGR128ToI128(DAG, Res);
6312 if (N->getValueType(0) == MVT::f128)
6313 Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);
6314 Results.push_back(Lowered);
6315 Results.push_back(Res.getValue(1));
6316 break;
6317 }
6318 case ISD::ATOMIC_STORE: {
6319 SDLoc DL(N);
6320 SDVTList Tys = DAG.getVTList(MVT::Other);
6321 SDValue Val = N->getOperand(1);
6322 if (Val.getValueType() == MVT::f128)
6323 Val = expandBitCastF128ToI128(DAG, Val, DL);
6324 Val = lowerI128ToGR128(DAG, Val);
6325
6326 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
6327 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6329 DL, Tys, Ops, MVT::i128, MMO);
6330 // We have to enforce sequential consistency by performing a
6331 // serialization operation after the store.
6332 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
6334 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
6335 MVT::Other, Res), 0);
6336 Results.push_back(Res);
6337 break;
6338 }
6340 SDLoc DL(N);
6341 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
6342 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
6343 lowerI128ToGR128(DAG, N->getOperand(2)),
6344 lowerI128ToGR128(DAG, N->getOperand(3)) };
6345 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6347 DL, Tys, Ops, MVT::i128, MMO);
6348 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
6350 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
6351 Results.push_back(lowerGR128ToI128(DAG, Res));
6352 Results.push_back(Success);
6353 Results.push_back(Res.getValue(2));
6354 break;
6355 }
6356 case ISD::BITCAST: {
6357 SDValue Src = N->getOperand(0);
6358 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
6359 !useSoftFloat()) {
6360 SDLoc DL(N);
6361 Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));
6362 }
6363 break;
6364 }
6365 default:
6366 llvm_unreachable("Unexpected node to lower");
6367 }
6368}
6369
6370void
6373 SelectionDAG &DAG) const {
6374 return LowerOperationWrapper(N, Results, DAG);
6375}
6376
6377const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
6378#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
6379 switch ((SystemZISD::NodeType)Opcode) {
6380 case SystemZISD::FIRST_NUMBER: break;
6381 OPCODE(RET_GLUE);
6382 OPCODE(CALL);
6383 OPCODE(SIBCALL);
6384 OPCODE(TLS_GDCALL);
6385 OPCODE(TLS_LDCALL);
6386 OPCODE(PCREL_WRAPPER);
6387 OPCODE(PCREL_OFFSET);
6388 OPCODE(ICMP);
6389 OPCODE(FCMP);
6390 OPCODE(STRICT_FCMP);
6391 OPCODE(STRICT_FCMPS);
6392 OPCODE(TM);
6393 OPCODE(BR_CCMASK);
6394 OPCODE(SELECT_CCMASK);
6395 OPCODE(ADJDYNALLOC);
6396 OPCODE(PROBED_ALLOCA);
6397 OPCODE(POPCNT);
6398 OPCODE(SMUL_LOHI);
6399 OPCODE(UMUL_LOHI);
6400 OPCODE(SDIVREM);
6401 OPCODE(UDIVREM);
6402 OPCODE(SADDO);
6403 OPCODE(SSUBO);
6404 OPCODE(UADDO);
6405 OPCODE(USUBO);
6406 OPCODE(ADDCARRY);
6407 OPCODE(SUBCARRY);
6408 OPCODE(GET_CCMASK);
6409 OPCODE(MVC);
6410 OPCODE(NC);
6411 OPCODE(OC);
6412 OPCODE(XC);
6413 OPCODE(CLC);
6414 OPCODE(MEMSET_MVC);
6415 OPCODE(STPCPY);
6416 OPCODE(STRCMP);
6417 OPCODE(SEARCH_STRING);
6418 OPCODE(IPM);
6419 OPCODE(TBEGIN);
6420 OPCODE(TBEGIN_NOFLOAT);
6421 OPCODE(TEND);
6422 OPCODE(BYTE_MASK);
6423 OPCODE(ROTATE_MASK);
6424 OPCODE(REPLICATE);
6425 OPCODE(JOIN_DWORDS);
6426 OPCODE(SPLAT);
6427 OPCODE(MERGE_HIGH);
6428 OPCODE(MERGE_LOW);
6429 OPCODE(SHL_DOUBLE);
6430 OPCODE(PERMUTE_DWORDS);
6431 OPCODE(PERMUTE);
6432 OPCODE(PACK);
6433 OPCODE(PACKS_CC);
6434 OPCODE(PACKLS_CC);
6435 OPCODE(UNPACK_HIGH);
6436 OPCODE(UNPACKL_HIGH);
6437 OPCODE(UNPACK_LOW);
6438 OPCODE(UNPACKL_LOW);
6439 OPCODE(VSHL_BY_SCALAR);
6440 OPCODE(VSRL_BY_SCALAR);
6441 OPCODE(VSRA_BY_SCALAR);
6442 OPCODE(VROTL_BY_SCALAR);
6443 OPCODE(VSUM);
6444 OPCODE(VACC);
6445 OPCODE(VSCBI);
6446 OPCODE(VAC);
6447 OPCODE(VSBI);
6448 OPCODE(VACCC);
6449 OPCODE(VSBCBI);
6450 OPCODE(VICMPE);
6451 OPCODE(VICMPH);
6452 OPCODE(VICMPHL);
6453 OPCODE(VICMPES);
6454 OPCODE(VICMPHS);
6455 OPCODE(VICMPHLS);
6456 OPCODE(VFCMPE);
6457 OPCODE(STRICT_VFCMPE);
6458 OPCODE(STRICT_VFCMPES);
6459 OPCODE(VFCMPH);
6460 OPCODE(STRICT_VFCMPH);
6461 OPCODE(STRICT_VFCMPHS);
6462 OPCODE(VFCMPHE);
6463 OPCODE(STRICT_VFCMPHE);
6464 OPCODE(STRICT_VFCMPHES);
6465 OPCODE(VFCMPES);
6466 OPCODE(VFCMPHS);
6467 OPCODE(VFCMPHES);
6468 OPCODE(VFTCI);
6469 OPCODE(VEXTEND);
6470 OPCODE(STRICT_VEXTEND);
6471 OPCODE(VROUND);
6472 OPCODE(STRICT_VROUND);
6473 OPCODE(VTM);
6474 OPCODE(SCMP128HI);
6475 OPCODE(UCMP128HI);
6476 OPCODE(VFAE_CC);
6477 OPCODE(VFAEZ_CC);
6478 OPCODE(VFEE_CC);
6479 OPCODE(VFEEZ_CC);
6480 OPCODE(VFENE_CC);
6481 OPCODE(VFENEZ_CC);
6482 OPCODE(VISTR_CC);
6483 OPCODE(VSTRC_CC);
6484 OPCODE(VSTRCZ_CC);
6485 OPCODE(VSTRS_CC);
6486 OPCODE(VSTRSZ_CC);
6487 OPCODE(TDC);
6488 OPCODE(ATOMIC_SWAPW);
6489 OPCODE(ATOMIC_LOADW_ADD);
6490 OPCODE(ATOMIC_LOADW_SUB);
6491 OPCODE(ATOMIC_LOADW_AND);
6492 OPCODE(ATOMIC_LOADW_OR);
6493 OPCODE(ATOMIC_LOADW_XOR);
6494 OPCODE(ATOMIC_LOADW_NAND);
6495 OPCODE(ATOMIC_LOADW_MIN);
6496 OPCODE(ATOMIC_LOADW_MAX);
6497 OPCODE(ATOMIC_LOADW_UMIN);
6498 OPCODE(ATOMIC_LOADW_UMAX);
6499 OPCODE(ATOMIC_CMP_SWAPW);
6500 OPCODE(ATOMIC_CMP_SWAP);
6501 OPCODE(ATOMIC_LOAD_128);
6502 OPCODE(ATOMIC_STORE_128);
6503 OPCODE(ATOMIC_CMP_SWAP_128);
6504 OPCODE(LRV);
6505 OPCODE(STRV);
6506 OPCODE(VLER);
6507 OPCODE(VSTER);
6508 OPCODE(STCKF);
6510 OPCODE(ADA_ENTRY);
6511 }
6512 return nullptr;
6513#undef OPCODE
6514}
6515
6516// Return true if VT is a vector whose elements are a whole number of bytes
6517// in width. Also check for presence of vector support.
6518bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
6519 if (!Subtarget.hasVector())
6520 return false;
6521
6522 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
6523}
6524
6525// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
6526// producing a result of type ResVT. Op is a possibly bitcast version
6527// of the input vector and Index is the index (based on type VecVT) that
6528// should be extracted. Return the new extraction if a simplification
6529// was possible or if Force is true.
6530SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
6531 EVT VecVT, SDValue Op,
6532 unsigned Index,
6533 DAGCombinerInfo &DCI,
6534 bool Force) const {
6535 SelectionDAG &DAG = DCI.DAG;
6536
6537 // The number of bytes being extracted.
6538 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6539
6540 for (;;) {
6541 unsigned Opcode = Op.getOpcode();
6542 if (Opcode == ISD::BITCAST)
6543 // Look through bitcasts.
6544 Op = Op.getOperand(0);
6545 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
6546 canTreatAsByteVector(Op.getValueType())) {
6547 // Get a VPERM-like permute mask and see whether the bytes covered
6548 // by the extracted element are a contiguous sequence from one
6549 // source operand.
6551 if (!getVPermMask(Op, Bytes))
6552 break;
6553 int First;
6554 if (!getShuffleInput(Bytes, Index * BytesPerElement,
6555 BytesPerElement, First))
6556 break;
6557 if (First < 0)
6558 return DAG.getUNDEF(ResVT);
6559 // Make sure the contiguous sequence starts at a multiple of the
6560 // original element size.
6561 unsigned Byte = unsigned(First) % Bytes.size();
6562 if (Byte % BytesPerElement != 0)
6563 break;
6564 // We can get the extracted value directly from an input.
6565 Index = Byte / BytesPerElement;
6566 Op = Op.getOperand(unsigned(First) / Bytes.size());
6567 Force = true;
6568 } else if (Opcode == ISD::BUILD_VECTOR &&
6569 canTreatAsByteVector(Op.getValueType())) {
6570 // We can only optimize this case if the BUILD_VECTOR elements are
6571 // at least as wide as the extracted value.
6572 EVT OpVT = Op.getValueType();
6573 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6574 if (OpBytesPerElement < BytesPerElement)
6575 break;
6576 // Make sure that the least-significant bit of the extracted value
6577 // is the least significant bit of an input.
6578 unsigned End = (Index + 1) * BytesPerElement;
6579 if (End % OpBytesPerElement != 0)
6580 break;
6581 // We're extracting the low part of one operand of the BUILD_VECTOR.
6582 Op = Op.getOperand(End / OpBytesPerElement - 1);
6583 if (!Op.getValueType().isInteger()) {
6584 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
6585 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
6586 DCI.AddToWorklist(Op.getNode());
6587 }
6588 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
6589 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
6590 if (VT != ResVT) {
6591 DCI.AddToWorklist(Op.getNode());
6592 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
6593 }
6594 return Op;
6595 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6597 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6598 canTreatAsByteVector(Op.getValueType()) &&
6599 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
6600 // Make sure that only the unextended bits are significant.
6601 EVT ExtVT = Op.getValueType();
6602 EVT OpVT = Op.getOperand(0).getValueType();
6603 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
6604 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6605 unsigned Byte = Index * BytesPerElement;
6606 unsigned SubByte = Byte % ExtBytesPerElement;
6607 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
6608 if (SubByte < MinSubByte ||
6609 SubByte + BytesPerElement > ExtBytesPerElement)
6610 break;
6611 // Get the byte offset of the unextended element
6612 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
6613 // ...then add the byte offset relative to that element.
6614 Byte += SubByte - MinSubByte;
6615 if (Byte % BytesPerElement != 0)
6616 break;
6617 Op = Op.getOperand(0);
6618 Index = Byte / BytesPerElement;
6619 Force = true;
6620 } else
6621 break;
6622 }
6623 if (Force) {
6624 if (Op.getValueType() != VecVT) {
6625 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
6626 DCI.AddToWorklist(Op.getNode());
6627 }
6628 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
6629 DAG.getConstant(Index, DL, MVT::i32));
6630 }
6631 return SDValue();
6632}
6633
6634// Optimize vector operations in scalar value Op on the basis that Op
6635// is truncated to TruncVT.
6636SDValue SystemZTargetLowering::combineTruncateExtract(
6637 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
6638 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
6639 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
6640 // of type TruncVT.
6641 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6642 TruncVT.getSizeInBits() % 8 == 0) {
6643 SDValue Vec = Op.getOperand(0);
6644 EVT VecVT = Vec.getValueType();
6645 if (canTreatAsByteVector(VecVT)) {
6646 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
6647 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6648 unsigned TruncBytes = TruncVT.getStoreSize();
6649 if (BytesPerElement % TruncBytes == 0) {
6650 // Calculate the value of Y' in the above description. We are
6651 // splitting the original elements into Scale equal-sized pieces
6652 // and for truncation purposes want the last (least-significant)
6653 // of these pieces for IndexN. This is easiest to do by calculating
6654 // the start index of the following element and then subtracting 1.
6655 unsigned Scale = BytesPerElement / TruncBytes;
6656 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
6657
6658 // Defer the creation of the bitcast from X to combineExtract,
6659 // which might be able to optimize the extraction.
6660 VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
6661 VecVT.getStoreSize() / TruncBytes);
6662 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
6663 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
6664 }
6665 }
6666 }
6667 }
6668 return SDValue();
6669}
6670
6671SDValue SystemZTargetLowering::combineZERO_EXTEND(
6672 SDNode *N, DAGCombinerInfo &DCI) const {
6673 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
6674 SelectionDAG &DAG = DCI.DAG;
6675 SDValue N0 = N->getOperand(0);
6676 EVT VT = N->getValueType(0);
6678 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
6679 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6680 if (TrueOp && FalseOp) {
6681 SDLoc DL(N0);
6682 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
6683 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
6684 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
6685 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
6686 // If N0 has multiple uses, change other uses as well.
6687 if (!N0.hasOneUse()) {
6688 SDValue TruncSelect =
6689 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
6690 DCI.CombineTo(N0.getNode(), TruncSelect);
6691 }
6692 return NewSelect;
6693 }
6694 }
6695 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
6696 // of the result is smaller than the size of X and all the truncated bits
6697 // of X are already zero.
6698 if (N0.getOpcode() == ISD::XOR &&
6699 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
6700 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6701 N0.getOperand(1).getOpcode() == ISD::Constant) {
6702 SDValue X = N0.getOperand(0).getOperand(0);
6703 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
6704 KnownBits Known = DAG.computeKnownBits(X);
6705 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
6706 N0.getValueSizeInBits(),
6707 VT.getSizeInBits());
6708 if (TruncatedBits.isSubsetOf(Known.Zero)) {
6709 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
6711 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
6712 X, DAG.getConstant(Mask, SDLoc(N0), VT));
6713 }
6714 }
6715 }
6716
6717 return SDValue();
6718}
6719
6720SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
6721 SDNode *N, DAGCombinerInfo &DCI) const {
6722 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
6723 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
6724 // into (select_cc LHS, RHS, -1, 0, COND)
6725 SelectionDAG &DAG = DCI.DAG;
6726 SDValue N0 = N->getOperand(0);
6727 EVT VT = N->getValueType(0);
6728 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
6729 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
6730 N0 = N0.getOperand(0);
6731 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
6732 SDLoc DL(N0);
6733 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
6734 DAG.getConstant(-1, DL, VT), DAG.getConstant(0, DL, VT),
6735 N0.getOperand(2) };
6736 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
6737 }
6738 return SDValue();
6739}
6740
6741SDValue SystemZTargetLowering::combineSIGN_EXTEND(
6742 SDNode *N, DAGCombinerInfo &DCI) const {
6743 // Convert (sext (ashr (shl X, C1), C2)) to
6744 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
6745 // cheap as narrower ones.
6746 SelectionDAG &DAG = DCI.DAG;
6747 SDValue N0 = N->getOperand(0);
6748 EVT VT = N->getValueType(0);
6749 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
6750 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6751 SDValue Inner = N0.getOperand(0);
6752 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
6753 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
6754 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
6755 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
6756 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
6757 EVT ShiftVT = N0.getOperand(1).getValueType();
6758 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
6759 Inner.getOperand(0));
6760 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
6761 DAG.getConstant(NewShlAmt, SDLoc(Inner),
6762 ShiftVT));
6763 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
6764 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
6765 }
6766 }
6767 }
6768
6769 return SDValue();
6770}
6771
6772SDValue SystemZTargetLowering::combineMERGE(
6773 SDNode *N, DAGCombinerInfo &DCI) const {
6774 SelectionDAG &DAG = DCI.DAG;
6775 unsigned Opcode = N->getOpcode();
6776 SDValue Op0 = N->getOperand(0);
6777 SDValue Op1 = N->getOperand(1);
6778 if (Op0.getOpcode() == ISD::BITCAST)
6779 Op0 = Op0.getOperand(0);
6781 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
6782 // for v4f32.
6783 if (Op1 == N->getOperand(0))
6784 return Op1;
6785 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
6786 EVT VT = Op1.getValueType();
6787 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
6788 if (ElemBytes <= 4) {
6789 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
6792 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
6793 SystemZ::VectorBytes / ElemBytes / 2);
6794 if (VT != InVT) {
6795 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
6796 DCI.AddToWorklist(Op1.getNode());
6797 }
6798 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
6799 DCI.AddToWorklist(Op.getNode());
6800 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
6801 }
6802 }
6803 return SDValue();
6804}
6805
6806static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
6807 SDNode *&HiPart) {
6808 LoPart = HiPart = nullptr;
6809
6810 // Scan through all users.
6811 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
6812 UI != UIEnd; ++UI) {
6813 // Skip the uses of the chain.
6814 if (UI.getUse().getResNo() != 0)
6815 continue;
6816
6817 // Verify every user is a TRUNCATE to i64 of the low or high half.
6818 SDNode *User = *UI;
6819 bool IsLoPart = true;
6820 if (User->getOpcode() == ISD::SRL &&
6821 User->getOperand(1).getOpcode() == ISD::Constant &&
6822 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
6823 User = *User->use_begin();
6824 IsLoPart = false;
6825 }
6826 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)
6827 return false;
6828
6829 if (IsLoPart) {
6830 if (LoPart)
6831 return false;
6832 LoPart = User;
6833 } else {
6834 if (HiPart)
6835 return false;
6836 HiPart = User;
6837 }
6838 }
6839 return true;
6840}
6841
6842static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
6843 SDNode *&HiPart) {
6844 LoPart = HiPart = nullptr;
6845
6846 // Scan through all users.
6847 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
6848 UI != UIEnd; ++UI) {
6849 // Skip the uses of the chain.
6850 if (UI.getUse().getResNo() != 0)
6851 continue;
6852
6853 // Verify every user is an EXTRACT_SUBREG of the low or high half.
6854 SDNode *User = *UI;
6855 if (!User->hasOneUse() || !User->isMachineOpcode() ||
6856 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
6857 return false;
6858
6859 switch (User->getConstantOperandVal(1)) {
6860 case SystemZ::subreg_l64:
6861 if (LoPart)
6862 return false;
6863 LoPart = User;
6864 break;
6865 case SystemZ::subreg_h64:
6866 if (HiPart)
6867 return false;
6868 HiPart = User;
6869 break;
6870 default:
6871 return false;
6872 }
6873 }
6874 return true;
6875}
6876
6877SDValue SystemZTargetLowering::combineLOAD(
6878 SDNode *N, DAGCombinerInfo &DCI) const {
6879 SelectionDAG &DAG = DCI.DAG;
6880 EVT LdVT = N->getValueType(0);
6881 SDLoc DL(N);
6882
6883 // Replace a 128-bit load that is used solely to move its value into GPRs
6884 // by separate loads of both halves.
6885 LoadSDNode *LD = cast<LoadSDNode>(N);
6886 if (LD->isSimple() && ISD::isNormalLoad(LD)) {
6887 SDNode *LoPart, *HiPart;
6888 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||
6889 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {
6890 // Rewrite each extraction as an independent load.
6891 SmallVector<SDValue, 2> ArgChains;
6892 if (HiPart) {
6893 SDValue EltLoad = DAG.getLoad(
6894 HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(),
6895 LD->getPointerInfo(), LD->getOriginalAlign(),
6896 LD->getMemOperand()->getFlags(), LD->getAAInfo());
6897
6898 DCI.CombineTo(HiPart, EltLoad, true);
6899 ArgChains.push_back(EltLoad.getValue(1));
6900 }
6901 if (LoPart) {
6902 SDValue EltLoad = DAG.getLoad(
6903 LoPart->getValueType(0), DL, LD->getChain(),
6904 DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)),
6905 LD->getPointerInfo().getWithOffset(8), LD->getOriginalAlign(),
6906 LD->getMemOperand()->getFlags(), LD->getAAInfo());
6907
6908 DCI.CombineTo(LoPart, EltLoad, true);
6909 ArgChains.push_back(EltLoad.getValue(1));
6910 }
6911
6912 // Collect all chains via TokenFactor.
6913 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains);
6914 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
6915 DCI.AddToWorklist(Chain.getNode());
6916 return SDValue(N, 0);
6917 }
6918 }
6919
6920 if (LdVT.isVector() || LdVT.isInteger())
6921 return SDValue();
6922 // Transform a scalar load that is REPLICATEd as well as having other
6923 // use(s) to the form where the other use(s) use the first element of the
6924 // REPLICATE instead of the load. Otherwise instruction selection will not
6925 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
6926 // point loads.
6927
6928 SDValue Replicate;
6929 SmallVector<SDNode*, 8> OtherUses;
6930 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
6931 UI != UE; ++UI) {
6932 if (UI->getOpcode() == SystemZISD::REPLICATE) {
6933 if (Replicate)
6934 return SDValue(); // Should never happen
6935 Replicate = SDValue(*UI, 0);
6936 }
6937 else if (UI.getUse().getResNo() == 0)
6938 OtherUses.push_back(*UI);
6939 }
6940 if (!Replicate || OtherUses.empty())
6941 return SDValue();
6942
6943 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
6944 Replicate, DAG.getConstant(0, DL, MVT::i32));
6945 // Update uses of the loaded Value while preserving old chains.
6946 for (SDNode *U : OtherUses) {
6948 for (SDValue Op : U->ops())
6949 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
6950 DAG.UpdateNodeOperands(U, Ops);
6951 }
6952 return SDValue(N, 0);
6953}
6954
6955bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
6956 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
6957 return true;
6958 if (Subtarget.hasVectorEnhancements2())
6959 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
6960 return true;
6961 return false;
6962}
6963
6965 if (!VT.isVector() || !VT.isSimple() ||
6966 VT.getSizeInBits() != 128 ||
6967 VT.getScalarSizeInBits() % 8 != 0)
6968 return false;
6969
6970 unsigned NumElts = VT.getVectorNumElements();
6971 for (unsigned i = 0; i < NumElts; ++i) {
6972 if (M[i] < 0) continue; // ignore UNDEF indices
6973 if ((unsigned) M[i] != NumElts - 1 - i)
6974 return false;
6975 }
6976
6977 return true;
6978}
6979
6980static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
6981 for (auto *U : StoredVal->uses()) {
6982 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
6983 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
6984 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
6985 continue;
6986 } else if (isa<BuildVectorSDNode>(U)) {
6987 SDValue BuildVector = SDValue(U, 0);
6988 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
6989 isOnlyUsedByStores(BuildVector, DAG))
6990 continue;
6991 }
6992 return false;
6993 }
6994 return true;
6995}
6996
6997static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
6998 SDValue &HiPart) {
6999 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
7000 return false;
7001
7002 SDValue Op0 = Val.getOperand(0);
7003 SDValue Op1 = Val.getOperand(1);
7004
7005 if (Op0.getOpcode() == ISD::SHL)
7006 std::swap(Op0, Op1);
7007 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
7008 Op1.getOperand(1).getOpcode() != ISD::Constant ||
7009 Op1.getConstantOperandVal(1) != 64)
7010 return false;
7011 Op1 = Op1.getOperand(0);
7012
7013 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
7014 Op0.getOperand(0).getValueType() != MVT::i64)
7015 return false;
7016 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
7017 Op1.getOperand(0).getValueType() != MVT::i64)
7018 return false;
7019
7020 LoPart = Op0.getOperand(0);
7021 HiPart = Op1.getOperand(0);
7022 return true;
7023}
7024
7025static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
7026 SDValue &HiPart) {
7027 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
7028 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
7029 return false;
7030
7031 if (Val->getNumOperands() != 5 ||
7032 Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
7033 Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||
7034 Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)
7035 return false;
7036
7037 LoPart = Val->getOperand(1);
7038 HiPart = Val->getOperand(3);
7039 return true;
7040}
7041
7042SDValue SystemZTargetLowering::combineSTORE(
7043 SDNode *N, DAGCombinerInfo &DCI) const {
7044 SelectionDAG &DAG = DCI.DAG;
7045 auto *SN = cast<StoreSDNode>(N);
7046 auto &Op1 = N->getOperand(1);
7047 EVT MemVT = SN->getMemoryVT();
7048 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
7049 // for the extraction to be done on a vMiN value, so that we can use VSTE.
7050 // If X has wider elements then convert it to:
7051 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
7052 if (MemVT.isInteger() && SN->isTruncatingStore()) {
7053 if (SDValue Value =
7054 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
7055 DCI.AddToWorklist(Value.getNode());
7056
7057 // Rewrite the store with the new form of stored value.
7058 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
7059 SN->getBasePtr(), SN->getMemoryVT(),
7060 SN->getMemOperand());
7061 }
7062 }
7063 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
7064 if (!SN->isTruncatingStore() &&
7065 Op1.getOpcode() == ISD::BSWAP &&
7066 Op1.getNode()->hasOneUse() &&
7067 canLoadStoreByteSwapped(Op1.getValueType())) {
7068
7069 SDValue BSwapOp = Op1.getOperand(0);
7070
7071 if (BSwapOp.getValueType() == MVT::i16)
7072 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
7073
7074 SDValue Ops[] = {
7075 N->getOperand(0), BSwapOp, N->getOperand(2)
7076 };
7077
7078 return
7079 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
7080 Ops, MemVT, SN->getMemOperand());
7081 }
7082 // Combine STORE (element-swap) into VSTER
7083 if (!SN->isTruncatingStore() &&
7084 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
7085 Op1.getNode()->hasOneUse() &&
7086 Subtarget.hasVectorEnhancements2()) {
7087 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
7088 ArrayRef<int> ShuffleMask = SVN->getMask();
7089 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
7090 SDValue Ops[] = {
7091 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
7092 };
7093
7095 DAG.getVTList(MVT::Other),
7096 Ops, MemVT, SN->getMemOperand());
7097 }
7098 }
7099
7100 // Combine STORE (READCYCLECOUNTER) into STCKF.
7101 if (!SN->isTruncatingStore() &&
7103 Op1.hasOneUse() &&
7104 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
7105 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
7107 DAG.getVTList(MVT::Other),
7108 Ops, MemVT, SN->getMemOperand());
7109 }
7110
7111 // Transform a store of a 128-bit value moved from parts into two stores.
7112 if (SN->isSimple() && ISD::isNormalStore(SN)) {
7113 SDValue LoPart, HiPart;
7114 if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||
7115 (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {
7116 SDLoc DL(SN);
7117 SDValue Chain0 =
7118 DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(),
7119 SN->getPointerInfo(), SN->getOriginalAlign(),
7120 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7121 SDValue Chain1 =
7122 DAG.getStore(SN->getChain(), DL, LoPart,
7123 DAG.getObjectPtrOffset(DL, SN->getBasePtr(),
7125 SN->getPointerInfo().getWithOffset(8),
7126 SN->getOriginalAlign(),
7127 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7128
7129 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
7130 }
7131 }
7132
7133 // Replicate a reg or immediate with VREP instead of scalar multiply or
7134 // immediate load. It seems best to do this during the first DAGCombine as
7135 // it is straight-forward to handle the zero-extend node in the initial
7136 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
7137 // extracting an i16 element from a v16i8 vector).
7138 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
7139 isOnlyUsedByStores(Op1, DAG)) {
7140 SDValue Word = SDValue();
7141 EVT WordVT;
7142
7143 // Find a replicated immediate and return it if found in Word and its
7144 // type in WordVT.
7145 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
7146 // Some constants are better handled with a scalar store.
7147 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
7148 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
7149 return;
7150 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, C->getZExtValue()));
7151 if (VCI.isVectorConstantLegal(Subtarget) &&
7152 VCI.Opcode == SystemZISD::REPLICATE) {
7153 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
7154 WordVT = VCI.VecVT.getScalarType();
7155 }
7156 };
7157
7158 // Find a replicated register and return it if found in Word and its type
7159 // in WordVT.
7160 auto FindReplicatedReg = [&](SDValue MulOp) {
7161 EVT MulVT = MulOp.getValueType();
7162 if (MulOp->getOpcode() == ISD::MUL &&
7163 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
7164 // Find a zero extended value and its type.
7165 SDValue LHS = MulOp->getOperand(0);
7166 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
7167 WordVT = LHS->getOperand(0).getValueType();
7168 else if (LHS->getOpcode() == ISD::AssertZext)
7169 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
7170 else
7171 return;
7172 // Find a replicating constant, e.g. 0x00010001.
7173 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
7175 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
7176 if (VCI.isVectorConstantLegal(Subtarget) &&
7177 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
7178 WordVT == VCI.VecVT.getScalarType())
7179 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
7180 }
7181 }
7182 };
7183
7184 if (isa<BuildVectorSDNode>(Op1) &&
7185 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
7186 SDValue SplatVal = Op1->getOperand(0);
7187 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
7188 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
7189 else
7190 FindReplicatedReg(SplatVal);
7191 } else {
7192 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
7193 FindReplicatedImm(C, MemVT.getStoreSize());
7194 else
7195 FindReplicatedReg(Op1);
7196 }
7197
7198 if (Word != SDValue()) {
7199 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
7200 "Bad type handling");
7201 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
7202 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
7203 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
7204 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
7205 SN->getBasePtr(), SN->getMemOperand());
7206 }
7207 }
7208
7209 return SDValue();
7210}
7211
7212SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
7213 SDNode *N, DAGCombinerInfo &DCI) const {
7214 SelectionDAG &DAG = DCI.DAG;
7215 // Combine element-swap (LOAD) into VLER
7216 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7217 N->getOperand(0).hasOneUse() &&
7218 Subtarget.hasVectorEnhancements2()) {
7219 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
7220 ArrayRef<int> ShuffleMask = SVN->getMask();
7221 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
7222 SDValue Load = N->getOperand(0);
7223 LoadSDNode *LD = cast<LoadSDNode>(Load);
7224
7225 // Create the element-swapping load.
7226 SDValue Ops[] = {
7227 LD->getChain(), // Chain
7228 LD->getBasePtr() // Ptr
7229 };
7230 SDValue ESLoad =
7232 DAG.getVTList(LD->getValueType(0), MVT::Other),
7233 Ops, LD->getMemoryVT(), LD->getMemOperand());
7234
7235 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
7236 // by the load dead.
7237 DCI.CombineTo(N, ESLoad);
7238
7239 // Next, combine the load away, we give it a bogus result value but a real
7240 // chain result. The result value is dead because the shuffle is dead.
7241 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
7242
7243 // Return N so it doesn't get rechecked!
7244 return SDValue(N, 0);
7245 }
7246 }
7247
7248 return SDValue();
7249}
7250
7251SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
7252 SDNode *N, DAGCombinerInfo &DCI) const {
7253 SelectionDAG &DAG = DCI.DAG;
7254
7255 if (!Subtarget.hasVector())
7256 return SDValue();
7257
7258 // Look through bitcasts that retain the number of vector elements.
7259 SDValue Op = N->getOperand(0);
7260 if (Op.getOpcode() == ISD::BITCAST &&
7261 Op.getValueType().isVector() &&
7262 Op.getOperand(0).getValueType().isVector() &&
7263 Op.getValueType().getVectorNumElements() ==
7264 Op.getOperand(0).getValueType().getVectorNumElements())
7265 Op = Op.getOperand(0);
7266
7267 // Pull BSWAP out of a vector extraction.
7268 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
7269 EVT VecVT = Op.getValueType();
7270 EVT EltVT = VecVT.getVectorElementType();
7271 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
7272 Op.getOperand(0), N->getOperand(1));
7273 DCI.AddToWorklist(Op.getNode());
7274 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
7275 if (EltVT != N->getValueType(0)) {
7276 DCI.AddToWorklist(Op.getNode());
7277 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
7278 }
7279 return Op;
7280 }
7281
7282 // Try to simplify a vector extraction.
7283 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
7284 SDValue Op0 = N->getOperand(0);
7285 EVT VecVT = Op0.getValueType();
7286 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
7287 IndexN->getZExtValue(), DCI, false);
7288 }
7289 return SDValue();
7290}
7291
7292SDValue SystemZTargetLowering::combineJOIN_DWORDS(
7293 SDNode *N, DAGCombinerInfo &DCI) const {
7294 SelectionDAG &DAG = DCI.DAG;
7295 // (join_dwords X, X) == (replicate X)
7296 if (N->getOperand(0) == N->getOperand(1))
7297 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
7298 N->getOperand(0));
7299 return SDValue();
7300}
7301
7303 SDValue Chain1 = N1->getOperand(0);
7304 SDValue Chain2 = N2->getOperand(0);
7305
7306 // Trivial case: both nodes take the same chain.
7307 if (Chain1 == Chain2)
7308 return Chain1;
7309
7310 // FIXME - we could handle more complex cases via TokenFactor,
7311 // assuming we can verify that this would not create a cycle.
7312 return SDValue();
7313}
7314
7315SDValue SystemZTargetLowering::combineFP_ROUND(
7316 SDNode *N, DAGCombinerInfo &DCI) const {
7317
7318 if (!Subtarget.hasVector())
7319 return SDValue();
7320
7321 // (fpround (extract_vector_elt X 0))
7322 // (fpround (extract_vector_elt X 1)) ->
7323 // (extract_vector_elt (VROUND X) 0)
7324 // (extract_vector_elt (VROUND X) 2)
7325 //
7326 // This is a special case since the target doesn't really support v2f32s.
7327 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7328 SelectionDAG &DAG = DCI.DAG;
7329 SDValue Op0 = N->getOperand(OpNo);
7330 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
7332 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
7333 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7334 Op0.getConstantOperandVal(1) == 0) {
7335 SDValue Vec = Op0.getOperand(0);
7336 for (auto *U : Vec->uses()) {
7337 if (U != Op0.getNode() && U->hasOneUse() &&
7338 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7339 U->getOperand(0) == Vec &&
7340 U->getOperand(1).getOpcode() == ISD::Constant &&
7341 U->getConstantOperandVal(1) == 1) {
7342 SDValue OtherRound = SDValue(*U->use_begin(), 0);
7343 if (OtherRound.getOpcode() == N->getOpcode() &&
7344 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
7345 OtherRound.getValueType() == MVT::f32) {
7346 SDValue VRound, Chain;
7347 if (N->isStrictFPOpcode()) {
7348 Chain = MergeInputChains(N, OtherRound.getNode());
7349 if (!Chain)
7350 continue;
7352 {MVT::v4f32, MVT::Other}, {Chain, Vec});
7353 Chain = VRound.getValue(1);
7354 } else
7355 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
7356 MVT::v4f32, Vec);
7357 DCI.AddToWorklist(VRound.getNode());
7358 SDValue Extract1 =
7359 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
7360 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
7361 DCI.AddToWorklist(Extract1.getNode());
7362 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
7363 if (Chain)
7364 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
7365 SDValue Extract0 =
7366 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
7367 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7368 if (Chain)
7369 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7370 N->getVTList(), Extract0, Chain);
7371 return Extract0;
7372 }
7373 }
7374 }
7375 }
7376 return SDValue();
7377}
7378
7379SDValue SystemZTargetLowering::combineFP_EXTEND(
7380 SDNode *N, DAGCombinerInfo &DCI) const {
7381
7382 if (!Subtarget.hasVector())
7383 return SDValue();
7384
7385 // (fpextend (extract_vector_elt X 0))
7386 // (fpextend (extract_vector_elt X 2)) ->
7387 // (extract_vector_elt (VEXTEND X) 0)
7388 // (extract_vector_elt (VEXTEND X) 1)
7389 //
7390 // This is a special case since the target doesn't really support v2f32s.
7391 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7392 SelectionDAG &DAG = DCI.DAG;
7393 SDValue Op0 = N->getOperand(OpNo);
7394 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
7396 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
7397 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7398 Op0.getConstantOperandVal(1) == 0) {
7399 SDValue Vec = Op0.getOperand(0);
7400 for (auto *U : Vec->uses()) {
7401 if (U != Op0.getNode() && U->hasOneUse() &&
7402 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7403 U->getOperand(0) == Vec &&
7404 U->getOperand(1).getOpcode() == ISD::Constant &&
7405 U->getConstantOperandVal(1) == 2) {
7406 SDValue OtherExtend = SDValue(*U->use_begin(), 0);
7407 if (OtherExtend.getOpcode() == N->getOpcode() &&
7408 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
7409 OtherExtend.getValueType() == MVT::f64) {
7410 SDValue VExtend, Chain;
7411 if (N->isStrictFPOpcode()) {
7412 Chain = MergeInputChains(N, OtherExtend.getNode());
7413 if (!Chain)
7414 continue;
7415 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
7416 {MVT::v2f64, MVT::Other}, {Chain, Vec});
7417 Chain = VExtend.getValue(1);
7418 } else
7419 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
7420 MVT::v2f64, Vec);
7421 DCI.AddToWorklist(VExtend.getNode());
7422 SDValue Extract1 =
7423 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
7424 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
7425 DCI.AddToWorklist(Extract1.getNode());
7426 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
7427 if (Chain)
7428 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
7429 SDValue Extract0 =
7430 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
7431 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7432 if (Chain)
7433 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7434 N->getVTList(), Extract0, Chain);
7435 return Extract0;
7436 }
7437 }
7438 }
7439 }
7440 return SDValue();
7441}
7442
7443SDValue SystemZTargetLowering::combineINT_TO_FP(
7444 SDNode *N, DAGCombinerInfo &DCI) const {
7445 if (DCI.Level != BeforeLegalizeTypes)
7446 return SDValue();
7447 SelectionDAG &DAG = DCI.DAG;
7448 LLVMContext &Ctx = *DAG.getContext();
7449 unsigned Opcode = N->getOpcode();
7450 EVT OutVT = N->getValueType(0);
7451 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
7452 SDValue Op = N->getOperand(0);
7453 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
7454 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
7455
7456 // Insert an extension before type-legalization to avoid scalarization, e.g.:
7457 // v2f64 = uint_to_fp v2i16
7458 // =>
7459 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
7460 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
7461 OutScalarBits <= 64) {
7462 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
7463 EVT ExtVT = EVT::getVectorVT(
7464 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
7465 unsigned ExtOpcode =
7467 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
7468 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
7469 }
7470 return SDValue();
7471}
7472
7473SDValue SystemZTargetLowering::combineBSWAP(
7474 SDNode *N, DAGCombinerInfo &DCI) const {
7475 SelectionDAG &DAG = DCI.DAG;
7476 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
7477 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7478 N->getOperand(0).hasOneUse() &&
7479 canLoadStoreByteSwapped(N->getValueType(0))) {
7480 SDValue Load = N->getOperand(0);
7481 LoadSDNode *LD = cast<LoadSDNode>(Load);
7482
7483 // Create the byte-swapping load.
7484 SDValue Ops[] = {
7485 LD->getChain(), // Chain
7486 LD->getBasePtr() // Ptr
7487 };
7488 EVT LoadVT = N->getValueType(0);
7489 if (LoadVT == MVT::i16)
7490 LoadVT = MVT::i32;
7491 SDValue BSLoad =
7493 DAG.getVTList(LoadVT, MVT::Other),
7494 Ops, LD->getMemoryVT(), LD->getMemOperand());
7495
7496 // If this is an i16 load, insert the truncate.
7497 SDValue ResVal = BSLoad;
7498 if (N->getValueType(0) == MVT::i16)
7499 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
7500
7501 // First, combine the bswap away. This makes the value produced by the
7502 // load dead.
7503 DCI.CombineTo(N, ResVal);
7504
7505 // Next, combine the load away, we give it a bogus result value but a real
7506 // chain result. The result value is dead because the bswap is dead.
7507 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
7508
7509 // Return N so it doesn't get rechecked!
7510 return SDValue(N, 0);
7511 }
7512
7513 // Look through bitcasts that retain the number of vector elements.
7514 SDValue Op = N->getOperand(0);
7515 if (Op.getOpcode() == ISD::BITCAST &&
7516 Op.getValueType().isVector() &&
7517 Op.getOperand(0).getValueType().isVector() &&
7518 Op.getValueType().getVectorNumElements() ==
7519 Op.getOperand(0).getValueType().getVectorNumElements())
7520 Op = Op.getOperand(0);
7521
7522 // Push BSWAP into a vector insertion if at least one side then simplifies.
7523 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
7524 SDValue Vec = Op.getOperand(0);
7525 SDValue Elt = Op.getOperand(1);
7526 SDValue Idx = Op.getOperand(2);
7527
7529 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
7531 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
7532 (canLoadStoreByteSwapped(N->getValueType(0)) &&
7533 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
7534 EVT VecVT = N->getValueType(0);
7535 EVT EltVT = N->getValueType(0).getVectorElementType();
7536 if (VecVT != Vec.getValueType()) {
7537 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
7538 DCI.AddToWorklist(Vec.getNode());
7539 }
7540 if (EltVT != Elt.getValueType()) {
7541 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
7542 DCI.AddToWorklist(Elt.getNode());
7543 }
7544 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
7545 DCI.AddToWorklist(Vec.getNode());
7546 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
7547 DCI.AddToWorklist(Elt.getNode());
7548 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
7549 Vec, Elt, Idx);
7550 }
7551 }
7552
7553 // Push BSWAP into a vector shuffle if at least one side then simplifies.
7554 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
7555 if (SV && Op.hasOneUse()) {
7556 SDValue Op0 = Op.getOperand(0);
7557 SDValue Op1 = Op.getOperand(1);
7558
7560 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
7562 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
7563 EVT VecVT = N->getValueType(0);
7564 if (VecVT != Op0.getValueType()) {
7565 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
7566 DCI.AddToWorklist(Op0.getNode());
7567 }
7568 if (VecVT != Op1.getValueType()) {
7569 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
7570 DCI.AddToWorklist(Op1.getNode());
7571 }
7572 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
7573 DCI.AddToWorklist(Op0.getNode());
7574 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
7575 DCI.AddToWorklist(Op1.getNode());
7576 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
7577 }
7578 }
7579
7580 return SDValue();
7581}
7582
7583static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
7584 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
7585 // set by the CCReg instruction using the CCValid / CCMask masks,
7586 // If the CCReg instruction is itself a ICMP testing the condition
7587 // code set by some other instruction, see whether we can directly
7588 // use that condition code.
7589
7590 // Verify that we have an ICMP against some constant.
7591 if (CCValid != SystemZ::CCMASK_ICMP)
7592 return false;
7593 auto *ICmp = CCReg.getNode();
7594 if (ICmp->getOpcode() != SystemZISD::ICMP)
7595 return false;
7596 auto *CompareLHS = ICmp->getOperand(0).getNode();
7597 auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
7598 if (!CompareRHS)
7599 return false;
7600
7601 // Optimize the case where CompareLHS is a SELECT_CCMASK.
7602 if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
7603 // Verify that we have an appropriate mask for a EQ or NE comparison.
7604 bool Invert = false;
7605 if (CCMask == SystemZ::CCMASK_CMP_NE)
7606 Invert = !Invert;
7607 else if (CCMask != SystemZ::CCMASK_CMP_EQ)
7608 return false;
7609
7610 // Verify that the ICMP compares against one of select values.
7611 auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
7612 if (!TrueVal)
7613 return false;
7614 auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7615 if (!FalseVal)
7616 return false;
7617 if (CompareRHS->getZExtValue() == FalseVal->getZExtValue())
7618 Invert = !Invert;
7619 else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue())
7620 return false;
7621
7622 // Compute the effective CC mask for the new branch or select.
7623 auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
7624 auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
7625 if (!NewCCValid || !NewCCMask)
7626 return false;
7627 CCValid = NewCCValid->getZExtValue();
7628 CCMask = NewCCMask->getZExtValue();
7629 if (Invert)
7630 CCMask ^= CCValid;
7631
7632 // Return the updated CCReg link.
7633 CCReg = CompareLHS->getOperand(4);
7634 return true;
7635 }
7636
7637 // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
7638 if (CompareLHS->getOpcode() == ISD::SRA) {
7639 auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7640 if (!SRACount || SRACount->getZExtValue() != 30)
7641 return false;
7642 auto *SHL = CompareLHS->getOperand(0).getNode();
7643 if (SHL->getOpcode() != ISD::SHL)
7644 return false;
7645 auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
7646 if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
7647 return false;
7648 auto *IPM = SHL->getOperand(0).getNode();
7649 if (IPM->getOpcode() != SystemZISD::IPM)
7650 return false;
7651
7652 // Avoid introducing CC spills (because SRA would clobber CC).
7653 if (!CompareLHS->hasOneUse())
7654 return false;
7655 // Verify that the ICMP compares against zero.
7656 if (CompareRHS->getZExtValue() != 0)
7657 return false;
7658
7659 // Compute the effective CC mask for the new branch or select.
7660 CCMask = SystemZ::reverseCCMask(CCMask);
7661
7662 // Return the updated CCReg link.
7663 CCReg = IPM->getOperand(0);
7664 return true;
7665 }
7666
7667 return false;
7668}
7669
7670SDValue SystemZTargetLowering::combineBR_CCMASK(
7671 SDNode *N, DAGCombinerInfo &DCI) const {
7672 SelectionDAG &DAG = DCI.DAG;
7673
7674 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
7675 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
7676 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
7677 if (!CCValid || !CCMask)
7678 return SDValue();
7679
7680 int CCValidVal = CCValid->getZExtValue();
7681 int CCMaskVal = CCMask->getZExtValue();
7682 SDValue Chain = N->getOperand(0);
7683 SDValue CCReg = N->getOperand(4);
7684
7685 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7686 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
7687 Chain,
7688 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7689 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7690 N->getOperand(3), CCReg);
7691 return SDValue();
7692}
7693
7694SDValue SystemZTargetLowering::combineSELECT_CCMASK(
7695 SDNode *N, DAGCombinerInfo &DCI) const {
7696 SelectionDAG &DAG = DCI.DAG;
7697
7698 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
7699 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
7700 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
7701 if (!CCValid || !CCMask)
7702 return SDValue();
7703
7704 int CCValidVal = CCValid->getZExtValue();
7705 int CCMaskVal = CCMask->getZExtValue();
7706 SDValue CCReg = N->getOperand(4);
7707
7708 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7709 return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
7710 N->getOperand(0), N->getOperand(1),
7711 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7712 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7713 CCReg);
7714 return SDValue();
7715}
7716
7717
7718SDValue SystemZTargetLowering::combineGET_CCMASK(
7719 SDNode *N, DAGCombinerInfo &DCI) const {
7720
7721 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
7722 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
7723 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
7724 if (!CCValid || !CCMask)
7725 return SDValue();
7726 int CCValidVal = CCValid->getZExtValue();
7727 int CCMaskVal = CCMask->getZExtValue();
7728
7729 SDValue Select = N->getOperand(0);
7730 if (Select->getOpcode() == ISD::TRUNCATE)
7731 Select = Select->getOperand(0);
7732 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
7733 return SDValue();
7734
7735 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
7736 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
7737 if (!SelectCCValid || !SelectCCMask)
7738 return SDValue();
7739 int SelectCCValidVal = SelectCCValid->getZExtValue();
7740 int SelectCCMaskVal = SelectCCMask->getZExtValue();
7741
7742 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
7743 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
7744 if (!TrueVal || !FalseVal)
7745 return SDValue();
7746 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
7747 ;
7748 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
7749 SelectCCMaskVal ^= SelectCCValidVal;
7750 else
7751 return SDValue();
7752
7753 if (SelectCCValidVal & ~CCValidVal)
7754 return SDValue();
7755 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
7756 return SDValue();
7757
7758 return Select->getOperand(4);
7759}
7760
7761SDValue SystemZTargetLowering::combineIntDIVREM(
7762 SDNode *N, DAGCombinerInfo &DCI) const {
7763 SelectionDAG &DAG = DCI.DAG;
7764 EVT VT = N->getValueType(0);
7765 // In the case where the divisor is a vector of constants a cheaper
7766 // sequence of instructions can replace the divide. BuildSDIV is called to
7767 // do this during DAG combining, but it only succeeds when it can build a
7768 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
7769 // since it is not Legal but Custom it can only happen before
7770 // legalization. Therefore we must scalarize this early before Combine
7771 // 1. For widened vectors, this is already the result of type legalization.
7772 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
7773 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
7774 return DAG.UnrollVectorOp(N);
7775 return SDValue();
7776}
7777
7778SDValue SystemZTargetLowering::combineINTRINSIC(
7779 SDNode *N, DAGCombinerInfo &DCI) const {
7780 SelectionDAG &DAG = DCI.DAG;
7781
7782 unsigned Id = N->getConstantOperandVal(1);
7783 switch (Id) {
7784 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
7785 // or larger is simply a vector load.
7786 case Intrinsic::s390_vll:
7787 case Intrinsic::s390_vlrl:
7788 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
7789 if (C->getZExtValue() >= 15)
7790 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
7791 N->getOperand(3), MachinePointerInfo());
7792 break;
7793 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
7794 case Intrinsic::s390_vstl:
7795 case Intrinsic::s390_vstrl:
7796 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
7797 if (C->getZExtValue() >= 15)
7798 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
7799 N->getOperand(4), MachinePointerInfo());
7800 break;
7801 }
7802
7803 return SDValue();
7804}
7805
7806SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
7807 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
7808 return N->getOperand(0);
7809 return N;
7810}
7811
7813 DAGCombinerInfo &DCI) const {
7814 switch(N->getOpcode()) {
7815 default: break;
7816 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
7817 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
7818 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
7820 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
7821 case ISD::LOAD: return combineLOAD(N, DCI);
7822 case ISD::STORE: return combineSTORE(N, DCI);
7823 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
7824 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
7825 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
7827 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
7829 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
7830 case ISD::SINT_TO_FP:
7831 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
7832 case ISD::BSWAP: return combineBSWAP(N, DCI);
7833 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
7834 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
7835 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
7836 case ISD::SDIV:
7837 case ISD::UDIV:
7838 case ISD::SREM:
7839 case ISD::UREM: return combineIntDIVREM(N, DCI);
7841 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
7842 }
7843
7844 return SDValue();
7845}
7846
7847// Return the demanded elements for the OpNo source operand of Op. DemandedElts
7848// are for Op.
7849static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
7850 unsigned OpNo) {
7851 EVT VT = Op.getValueType();
7852 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
7853 APInt SrcDemE;
7854 unsigned Opcode = Op.getOpcode();
7855 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7856 unsigned Id = Op.getConstantOperandVal(0);
7857 switch (Id) {
7858 case Intrinsic::s390_vpksh: // PACKS
7859 case Intrinsic::s390_vpksf:
7860 case Intrinsic::s390_vpksg:
7861 case Intrinsic::s390_vpkshs: // PACKS_CC
7862 case Intrinsic::s390_vpksfs:
7863 case Intrinsic::s390_vpksgs:
7864 case Intrinsic::s390_vpklsh: // PACKLS
7865 case Intrinsic::s390_vpklsf:
7866 case Intrinsic::s390_vpklsg:
7867 case Intrinsic::s390_vpklshs: // PACKLS_CC
7868 case Intrinsic::s390_vpklsfs:
7869 case Intrinsic::s390_vpklsgs:
7870 // VECTOR PACK truncates the elements of two source vectors into one.
7871 SrcDemE = DemandedElts;
7872 if (OpNo == 2)
7873 SrcDemE.lshrInPlace(NumElts / 2);
7874 SrcDemE = SrcDemE.trunc(NumElts / 2);
7875 break;
7876 // VECTOR UNPACK extends half the elements of the source vector.
7877 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7878 case Intrinsic::s390_vuphh:
7879 case Intrinsic::s390_vuphf:
7880 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
7881 case Intrinsic::s390_vuplhh:
7882 case Intrinsic::s390_vuplhf:
7883 SrcDemE = APInt(NumElts * 2, 0);
7884 SrcDemE.insertBits(DemandedElts, 0);
7885 break;
7886 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7887 case Intrinsic::s390_vuplhw:
7888 case Intrinsic::s390_vuplf:
7889 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
7890 case Intrinsic::s390_vupllh:
7891 case Intrinsic::s390_vupllf:
7892 SrcDemE = APInt(NumElts * 2, 0);
7893 SrcDemE.insertBits(DemandedElts, NumElts);
7894 break;
7895 case Intrinsic::s390_vpdi: {
7896 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
7897 SrcDemE = APInt(NumElts, 0);
7898 if (!DemandedElts[OpNo - 1])
7899 break;
7900 unsigned Mask = Op.getConstantOperandVal(3);
7901 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
7902 // Demand input element 0 or 1, given by the mask bit value.
7903 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
7904 break;
7905 }
7906 case Intrinsic::s390_vsldb: {
7907 // VECTOR SHIFT LEFT DOUBLE BY BYTE
7908 assert(VT == MVT::v16i8 && "Unexpected type.");
7909 unsigned FirstIdx = Op.getConstantOperandVal(3);
7910 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
7911 unsigned NumSrc0Els = 16 - FirstIdx;
7912 SrcDemE = APInt(NumElts, 0);
7913 if (OpNo == 1) {
7914 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
7915 SrcDemE.insertBits(DemEls, FirstIdx);
7916 } else {
7917 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
7918 SrcDemE.insertBits(DemEls, 0);
7919 }
7920 break;
7921 }
7922 case Intrinsic::s390_vperm:
7923 SrcDemE = APInt(NumElts, -1);
7924 break;
7925 default:
7926 llvm_unreachable("Unhandled intrinsic.");
7927 break;
7928 }
7929 } else {
7930 switch (Opcode) {
7932 // Scalar operand.
7933 SrcDemE = APInt(1, 1);
7934 break;
7936 SrcDemE = DemandedElts;
7937 break;
7938 default:
7939 llvm_unreachable("Unhandled opcode.");
7940 break;
7941 }
7942 }
7943 return SrcDemE;
7944}
7945
7946static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
7947 const APInt &DemandedElts,
7948 const SelectionDAG &DAG, unsigned Depth,
7949 unsigned OpNo) {
7950 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
7951 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
7952 KnownBits LHSKnown =
7953 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
7954 KnownBits RHSKnown =
7955 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
7956 Known = LHSKnown.intersectWith(RHSKnown);
7957}
7958
7959void
7961 KnownBits &Known,
7962 const APInt &DemandedElts,
7963 const SelectionDAG &DAG,
7964 unsigned Depth) const {
7965 Known.resetAll();
7966
7967 // Intrinsic CC result is returned in the two low bits.
7968 unsigned tmp0, tmp1; // not used
7969 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) {
7970 Known.Zero.setBitsFrom(2);
7971 return;
7972 }
7973 EVT VT = Op.getValueType();
7974 if (Op.getResNo() != 0 || VT == MVT::Untyped)
7975 return;
7976 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
7977 "KnownBits does not match VT in bitwidth");
7978 assert ((!VT.isVector() ||
7979 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
7980 "DemandedElts does not match VT number of elements");
7981 unsigned BitWidth = Known.getBitWidth();
7982 unsigned Opcode = Op.getOpcode();
7983 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7984 bool IsLogical = false;
7985 unsigned Id = Op.getConstantOperandVal(0);
7986 switch (Id) {
7987 case Intrinsic::s390_vpksh: // PACKS
7988 case Intrinsic::s390_vpksf:
7989 case Intrinsic::s390_vpksg:
7990 case Intrinsic::s390_vpkshs: // PACKS_CC
7991 case Intrinsic::s390_vpksfs:
7992 case Intrinsic::s390_vpksgs:
7993 case Intrinsic::s390_vpklsh: // PACKLS
7994 case Intrinsic::s390_vpklsf:
7995 case Intrinsic::s390_vpklsg:
7996 case Intrinsic::s390_vpklshs: // PACKLS_CC
7997 case Intrinsic::s390_vpklsfs:
7998 case Intrinsic::s390_vpklsgs:
7999 case Intrinsic::s390_vpdi:
8000 case Intrinsic::s390_vsldb:
8001 case Intrinsic::s390_vperm:
8002 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
8003 break;
8004 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
8005 case Intrinsic::s390_vuplhh:
8006 case Intrinsic::s390_vuplhf:
8007 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
8008 case Intrinsic::s390_vupllh:
8009 case Intrinsic::s390_vupllf:
8010 IsLogical = true;
8011 [[fallthrough]];
8012 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
8013 case Intrinsic::s390_vuphh:
8014 case Intrinsic::s390_vuphf:
8015 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
8016 case Intrinsic::s390_vuplhw:
8017 case Intrinsic::s390_vuplf: {
8018 SDValue SrcOp = Op.getOperand(1);
8019 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
8020 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
8021 if (IsLogical) {
8022 Known = Known.zext(BitWidth);
8023 } else
8024 Known = Known.sext(BitWidth);
8025 break;
8026 }
8027 default:
8028 break;
8029 }
8030 } else {
8031 switch (Opcode) {
8034 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
8035 break;
8036 case SystemZISD::REPLICATE: {
8037 SDValue SrcOp = Op.getOperand(0);
8038 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
8039 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
8040 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
8041 break;
8042 }
8043 default:
8044 break;
8045 }
8046 }
8047
8048 // Known has the width of the source operand(s). Adjust if needed to match
8049 // the passed bitwidth.
8050 if (Known.getBitWidth() != BitWidth)
8051 Known = Known.anyextOrTrunc(BitWidth);
8052}
8053
8054static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
8055 const SelectionDAG &DAG, unsigned Depth,
8056 unsigned OpNo) {
8057 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
8058 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
8059 if (LHS == 1) return 1; // Early out.
8060 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
8061 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
8062 if (RHS == 1) return 1; // Early out.
8063 unsigned Common = std::min(LHS, RHS);
8064 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
8065 EVT VT = Op.getValueType();
8066 unsigned VTBits = VT.getScalarSizeInBits();
8067 if (SrcBitWidth > VTBits) { // PACK
8068 unsigned SrcExtraBits = SrcBitWidth - VTBits;
8069 if (Common > SrcExtraBits)
8070 return (Common - SrcExtraBits);
8071 return 1;
8072 }
8073 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
8074 return Common;
8075}
8076
8077unsigned
8079 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
8080 unsigned Depth) const {
8081 if (Op.getResNo() != 0)
8082 return 1;
8083 unsigned Opcode = Op.getOpcode();
8084 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
8085 unsigned Id = Op.getConstantOperandVal(0);
8086 switch (Id) {
8087 case Intrinsic::s390_vpksh: // PACKS
8088 case Intrinsic::s390_vpksf:
8089 case Intrinsic::s390_vpksg:
8090 case Intrinsic::s390_vpkshs: // PACKS_CC
8091 case Intrinsic::s390_vpksfs:
8092 case Intrinsic::s390_vpksgs:
8093 case Intrinsic::s390_vpklsh: // PACKLS
8094 case Intrinsic::s390_vpklsf:
8095 case Intrinsic::s390_vpklsg:
8096 case Intrinsic::s390_vpklshs: // PACKLS_CC
8097 case Intrinsic::s390_vpklsfs:
8098 case Intrinsic::s390_vpklsgs:
8099 case Intrinsic::s390_vpdi:
8100 case Intrinsic::s390_vsldb:
8101 case Intrinsic::s390_vperm:
8102 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
8103 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
8104 case Intrinsic::s390_vuphh:
8105 case Intrinsic::s390_vuphf:
8106 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
8107 case Intrinsic::s390_vuplhw:
8108 case Intrinsic::s390_vuplf: {
8109 SDValue PackedOp = Op.getOperand(1);
8110 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
8111 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
8112 EVT VT = Op.getValueType();
8113 unsigned VTBits = VT.getScalarSizeInBits();
8114 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
8115 return Tmp;
8116 }
8117 default:
8118 break;
8119 }
8120 } else {
8121 switch (Opcode) {
8123 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
8124 default:
8125 break;
8126 }
8127 }
8128
8129 return 1;
8130}
8131
8134 const APInt &DemandedElts, const SelectionDAG &DAG,
8135 bool PoisonOnly, unsigned Depth) const {
8136 switch (Op->getOpcode()) {
8139 return true;
8140 }
8141 return false;
8142}
8143
8144unsigned
8146 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
8147 unsigned StackAlign = TFI->getStackAlignment();
8148 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
8149 "Unexpected stack alignment");
8150 // The default stack probe size is 4096 if the function has no
8151 // stack-probe-size attribute.
8152 unsigned StackProbeSize =
8153 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
8154 // Round down to the stack alignment.
8155 StackProbeSize &= ~(StackAlign - 1);
8156 return StackProbeSize ? StackProbeSize : StackAlign;
8157}
8158
8159//===----------------------------------------------------------------------===//
8160// Custom insertion
8161//===----------------------------------------------------------------------===//
8162
8163// Force base value Base into a register before MI. Return the register.
8165 const SystemZInstrInfo *TII) {
8166 MachineBasicBlock *MBB = MI.getParent();
8167 MachineFunction &MF = *MBB->getParent();
8169
8170 if (Base.isReg()) {
8171 // Copy Base into a new virtual register to help register coalescing in
8172 // cases with multiple uses.
8173 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8174 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
8175 .add(Base);
8176 return Reg;
8177 }
8178
8179 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8180 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
8181 .add(Base)
8182 .addImm(0)
8183 .addReg(0);
8184 return Reg;
8185}
8186
8187// The CC operand of MI might be missing a kill marker because there
8188// were multiple uses of CC, and ISel didn't know which to mark.
8189// Figure out whether MI should have had a kill marker.
8191 // Scan forward through BB for a use/def of CC.
8193 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
8194 const MachineInstr& mi = *miI;
8195 if (mi.readsRegister(SystemZ::CC, /*TRI=*/nullptr))
8196 return false;
8197 if (mi.definesRegister(SystemZ::CC, /*TRI=*/nullptr))
8198 break; // Should have kill-flag - update below.
8199 }
8200
8201 // If we hit the end of the block, check whether CC is live into a
8202 // successor.
8203 if (miI == MBB->end()) {
8204 for (const MachineBasicBlock *Succ : MBB->successors())
8205 if (Succ->isLiveIn(SystemZ::CC))
8206 return false;
8207 }
8208
8209 return true;
8210}
8211
8212// Return true if it is OK for this Select pseudo-opcode to be cascaded
8213// together with other Select pseudo-opcodes into a single basic-block with
8214// a conditional jump around it.
8216 switch (MI.getOpcode()) {
8217 case SystemZ::Select32:
8218 case SystemZ::Select64:
8219 case SystemZ::Select128:
8220 case SystemZ::SelectF32:
8221 case SystemZ::SelectF64:
8222 case SystemZ::SelectF128:
8223 case SystemZ::SelectVR32:
8224 case SystemZ::SelectVR64:
8225 case SystemZ::SelectVR128:
8226 return true;
8227
8228 default:
8229 return false;
8230 }
8231}
8232
8233// Helper function, which inserts PHI functions into SinkMBB:
8234// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
8235// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
8237 MachineBasicBlock *TrueMBB,
8238 MachineBasicBlock *FalseMBB,
8239 MachineBasicBlock *SinkMBB) {
8240 MachineFunction *MF = TrueMBB->getParent();
8242
8243 MachineInstr *FirstMI = Selects.front();
8244 unsigned CCValid = FirstMI->getOperand(3).getImm();
8245 unsigned CCMask = FirstMI->getOperand(4).getImm();
8246
8247 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
8248
8249 // As we are creating the PHIs, we have to be careful if there is more than
8250 // one. Later Selects may reference the results of earlier Selects, but later
8251 // PHIs have to reference the individual true/false inputs from earlier PHIs.
8252 // That also means that PHI construction must work forward from earlier to
8253 // later, and that the code must maintain a mapping from earlier PHI's
8254 // destination registers, and the registers that went into the PHI.
8256
8257 for (auto *MI : Selects) {
8258 Register DestReg = MI->getOperand(0).getReg();
8259 Register TrueReg = MI->getOperand(1).getReg();
8260 Register FalseReg = MI->getOperand(2).getReg();
8261
8262 // If this Select we are generating is the opposite condition from
8263 // the jump we generated, then we have to swap the operands for the
8264 // PHI that is going to be generated.
8265 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
8266 std::swap(TrueReg, FalseReg);
8267
8268 if (RegRewriteTable.contains(TrueReg))
8269 TrueReg = RegRewriteTable[TrueReg].first;
8270
8271 if (RegRewriteTable.contains(FalseReg))
8272 FalseReg = RegRewriteTable[FalseReg].second;
8273
8274 DebugLoc DL = MI->getDebugLoc();
8275 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
8276 .addReg(TrueReg).addMBB(TrueMBB)
8277 .addReg(FalseReg).addMBB(FalseMBB);
8278
8279 // Add this PHI to the rewrite table.
8280 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
8281 }
8282
8284}
8285
8287SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
8288 MachineBasicBlock *BB) const {
8289 MachineFunction &MF = *BB->getParent();
8290 MachineFrameInfo &MFI = MF.getFrameInfo();
8291 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
8292 assert(TFL->hasReservedCallFrame(MF) &&
8293 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
8294 (void)TFL;
8295 // Get the MaxCallFrameSize value and erase MI since it serves no further
8296 // purpose as the call frame is statically reserved in the prolog. Set
8297 // AdjustsStack as MI is *not* mapped as a frame instruction.
8298 uint32_t NumBytes = MI.getOperand(0).getImm();
8299 if (NumBytes > MFI.getMaxCallFrameSize())
8300 MFI.setMaxCallFrameSize(NumBytes);
8301 MFI.setAdjustsStack(true);
8302
8303 MI.eraseFromParent();
8304 return BB;
8305}
8306
8307// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
8309SystemZTargetLowering::emitSelect(MachineInstr &MI,
8310 MachineBasicBlock *MBB) const {
8311 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
8312 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8313
8314 unsigned CCValid = MI.getOperand(3).getImm();
8315 unsigned CCMask = MI.getOperand(4).getImm();
8316
8317 // If we have a sequence of Select* pseudo instructions using the
8318 // same condition code value, we want to expand all of them into
8319 // a single pair of basic blocks using the same condition.
8322 Selects.push_back(&MI);
8323 unsigned Count = 0;
8324 for (MachineInstr &NextMI : llvm::make_range(
8325 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
8326 if (isSelectPseudo(NextMI)) {
8327 assert(NextMI.getOperand(3).getImm() == CCValid &&
8328 "Bad CCValid operands since CC was not redefined.");
8329 if (NextMI.getOperand(4).getImm() == CCMask ||
8330 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
8331 Selects.push_back(&NextMI);
8332 continue;
8333 }
8334 break;
8335 }
8336 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) ||
8337 NextMI.usesCustomInsertionHook())
8338 break;
8339 bool User = false;
8340 for (auto *SelMI : Selects)
8341 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
8342 User = true;
8343 break;
8344 }
8345 if (NextMI.isDebugInstr()) {
8346 if (User) {
8347 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
8348 DbgValues.push_back(&NextMI);
8349 }
8350 } else if (User || ++Count > 20)
8351 break;
8352 }
8353
8354 MachineInstr *LastMI = Selects.back();
8355 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) ||
8356 checkCCKill(*LastMI, MBB));
8357 MachineBasicBlock *StartMBB = MBB;
8359 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8360
8361 // Unless CC was killed in the last Select instruction, mark it as
8362 // live-in to both FalseMBB and JoinMBB.
8363 if (!CCKilled) {
8364 FalseMBB->addLiveIn(SystemZ::CC);
8365 JoinMBB->addLiveIn(SystemZ::CC);
8366 }
8367
8368 // StartMBB:
8369 // BRC CCMask, JoinMBB
8370 // # fallthrough to FalseMBB
8371 MBB = StartMBB;
8372 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8373 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8374 MBB->addSuccessor(JoinMBB);
8375 MBB->addSuccessor(FalseMBB);
8376
8377 // FalseMBB:
8378 // # fallthrough to JoinMBB
8379 MBB = FalseMBB;
8380 MBB->addSuccessor(JoinMBB);
8381
8382 // JoinMBB:
8383 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
8384 // ...
8385 MBB = JoinMBB;
8386 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
8387 for (auto *SelMI : Selects)
8388 SelMI->eraseFromParent();
8389
8391 for (auto *DbgMI : DbgValues)
8392 MBB->splice(InsertPos, StartMBB, DbgMI);
8393
8394 return JoinMBB;
8395}
8396
8397// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
8398// StoreOpcode is the store to use and Invert says whether the store should
8399// happen when the condition is false rather than true. If a STORE ON
8400// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
8401MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
8403 unsigned StoreOpcode,
8404 unsigned STOCOpcode,
8405 bool Invert) const {
8406 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8407
8408 Register SrcReg = MI.getOperand(0).getReg();
8409 MachineOperand Base = MI.getOperand(1);
8410 int64_t Disp = MI.getOperand(2).getImm();
8411 Register IndexReg = MI.getOperand(3).getReg();
8412 unsigned CCValid = MI.getOperand(4).getImm();
8413 unsigned CCMask = MI.getOperand(5).getImm();
8414 DebugLoc DL = MI.getDebugLoc();
8415
8416 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
8417
8418 // ISel pattern matching also adds a load memory operand of the same
8419 // address, so take special care to find the storing memory operand.
8420 MachineMemOperand *MMO = nullptr;
8421 for (auto *I : MI.memoperands())
8422 if (I->isStore()) {
8423 MMO = I;
8424 break;
8425 }
8426
8427 // Use STOCOpcode if possible. We could use different store patterns in
8428 // order to avoid matching the index register, but the performance trade-offs
8429 // might be more complicated in that case.
8430 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
8431 if (Invert)
8432 CCMask ^= CCValid;
8433
8434 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
8435 .addReg(SrcReg)
8436 .add(Base)
8437 .addImm(Disp)
8438 .addImm(CCValid)
8439 .addImm(CCMask)
8440 .addMemOperand(MMO);
8441
8442 MI.eraseFromParent();
8443 return MBB;
8444 }
8445
8446 // Get the condition needed to branch around the store.
8447 if (!Invert)
8448 CCMask ^= CCValid;
8449
8450 MachineBasicBlock *StartMBB = MBB;
8452 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8453
8454 // Unless CC was killed in the CondStore instruction, mark it as
8455 // live-in to both FalseMBB and JoinMBB.
8456 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) &&
8457 !checkCCKill(MI, JoinMBB)) {
8458 FalseMBB->addLiveIn(SystemZ::CC);
8459 JoinMBB->addLiveIn(SystemZ::CC);
8460 }
8461
8462 // StartMBB:
8463 // BRC CCMask, JoinMBB
8464 // # fallthrough to FalseMBB
8465 MBB = StartMBB;
8466 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8467 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8468 MBB->addSuccessor(JoinMBB);
8469 MBB->addSuccessor(FalseMBB);
8470
8471 // FalseMBB:
8472 // store %SrcReg, %Disp(%Index,%Base)
8473 // # fallthrough to JoinMBB
8474 MBB = FalseMBB;
8475 BuildMI(MBB, DL, TII->get(StoreOpcode))
8476 .addReg(SrcReg)
8477 .add(Base)
8478 .addImm(Disp)
8479 .addReg(IndexReg)
8480 .addMemOperand(MMO);
8481 MBB->addSuccessor(JoinMBB);
8482
8483 MI.eraseFromParent();
8484 return JoinMBB;
8485}
8486
8487// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
8489SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
8491 bool Unsigned) const {
8492 MachineFunction &MF = *MBB->getParent();
8493 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8495
8496 // Synthetic instruction to compare 128-bit values.
8497 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
8498 Register Op0 = MI.getOperand(0).getReg();
8499 Register Op1 = MI.getOperand(1).getReg();
8500
8501 MachineBasicBlock *StartMBB = MBB;
8503 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
8504
8505 // StartMBB:
8506 //
8507 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
8508 // Swap the inputs to get:
8509 // CC 1 if high(Op0) > high(Op1)
8510 // CC 2 if high(Op0) < high(Op1)
8511 // CC 0 if high(Op0) == high(Op1)
8512 //
8513 // If CC != 0, we'd done, so jump over the next instruction.
8514 //
8515 // VEC[L]G Op1, Op0
8516 // JNE JoinMBB
8517 // # fallthrough to HiEqMBB
8518 MBB = StartMBB;
8519 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
8520 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
8521 .addReg(Op1).addReg(Op0);
8522 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8524 MBB->addSuccessor(JoinMBB);
8525 MBB->addSuccessor(HiEqMBB);
8526
8527 // HiEqMBB:
8528 //
8529 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
8530 // Since we already know the high parts are equal, the CC
8531 // result will only depend on the low parts:
8532 // CC 1 if low(Op0) > low(Op1)
8533 // CC 3 if low(Op0) <= low(Op1)
8534 //
8535 // VCHLGS Tmp, Op0, Op1
8536 // # fallthrough to JoinMBB
8537 MBB = HiEqMBB;
8538 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
8539 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
8540 .addReg(Op0).addReg(Op1);
8541 MBB->addSuccessor(JoinMBB);
8542
8543 // Mark CC as live-in to JoinMBB.
8544 JoinMBB->addLiveIn(SystemZ::CC);
8545
8546 MI.eraseFromParent();
8547 return JoinMBB;
8548}
8549
8550// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
8551// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
8552// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
8553// whether the field should be inverted after performing BinOpcode (e.g. for
8554// NAND).
8555MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
8556 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
8557 bool Invert) const {
8558 MachineFunction &MF = *MBB->getParent();
8559 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8561
8562 // Extract the operands. Base can be a register or a frame index.
8563 // Src2 can be a register or immediate.
8564 Register Dest = MI.getOperand(0).getReg();
8565 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8566 int64_t Disp = MI.getOperand(2).getImm();
8567 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
8568 Register BitShift = MI.getOperand(4).getReg();
8569 Register NegBitShift = MI.getOperand(5).getReg();
8570 unsigned BitSize = MI.getOperand(6).getImm();
8571 DebugLoc DL = MI.getDebugLoc();
8572
8573 // Get the right opcodes for the displacement.
8574 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8575 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8576 assert(LOpcode && CSOpcode && "Displacement out of range");
8577
8578 // Create virtual registers for temporary results.
8579 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8580 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8581 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8582 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8583 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8584
8585 // Insert a basic block for the main loop.
8586 MachineBasicBlock *StartMBB = MBB;
8588 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8589
8590 // StartMBB:
8591 // ...
8592 // %OrigVal = L Disp(%Base)
8593 // # fall through to LoopMBB
8594 MBB = StartMBB;
8595 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
8596 MBB->addSuccessor(LoopMBB);
8597
8598 // LoopMBB:
8599 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
8600 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
8601 // %RotatedNewVal = OP %RotatedOldVal, %Src2
8602 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
8603 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
8604 // JNE LoopMBB
8605 // # fall through to DoneMBB
8606 MBB = LoopMBB;
8607 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8608 .addReg(OrigVal).addMBB(StartMBB)
8609 .addReg(Dest).addMBB(LoopMBB);
8610 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
8611 .addReg(OldVal).addReg(BitShift).addImm(0);
8612 if (Invert) {
8613 // Perform the operation normally and then invert every bit of the field.
8614 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8615 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
8616 // XILF with the upper BitSize bits set.
8617 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
8618 .addReg(Tmp).addImm(-1U << (32 - BitSize));
8619 } else if (BinOpcode)
8620 // A simply binary operation.
8621 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
8622 .addReg(RotatedOldVal)
8623 .add(Src2);
8624 else
8625 // Use RISBG to rotate Src2 into position and use it to replace the
8626 // field in RotatedOldVal.
8627 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
8628 .addReg(RotatedOldVal).addReg(Src2.getReg())
8629 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
8630 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
8631 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
8632 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
8633 .addReg(OldVal)
8634 .addReg(NewVal)
8635 .add(Base)
8636 .addImm(Disp);
8637 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8639 MBB->addSuccessor(LoopMBB);
8640 MBB->addSuccessor(DoneMBB);
8641
8642 MI.eraseFromParent();
8643 return DoneMBB;
8644}
8645
8646// Implement EmitInstrWithCustomInserter for subword pseudo
8647// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
8648// instruction that should be used to compare the current field with the
8649// minimum or maximum value. KeepOldMask is the BRC condition-code mask
8650// for when the current field should be kept.
8651MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
8652 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
8653 unsigned KeepOldMask) const {
8654 MachineFunction &MF = *MBB->getParent();
8655 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8657
8658 // Extract the operands. Base can be a register or a frame index.
8659 Register Dest = MI.getOperand(0).getReg();
8660 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8661 int64_t Disp = MI.getOperand(2).getImm();
8662 Register Src2 = MI.getOperand(3).getReg();
8663 Register BitShift = MI.getOperand(4).getReg();
8664 Register NegBitShift = MI.getOperand(5).getReg();
8665 unsigned BitSize = MI.getOperand(6).getImm();
8666 DebugLoc DL = MI.getDebugLoc();
8667
8668 // Get the right opcodes for the displacement.
8669 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8670 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8671 assert(LOpcode && CSOpcode && "Displacement out of range");
8672
8673 // Create virtual registers for temporary results.
8674 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8675 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8676 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8677 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8678 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8679 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8680
8681 // Insert 3 basic blocks for the loop.
8682 MachineBasicBlock *StartMBB = MBB;
8684 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8685 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
8686 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
8687
8688 // StartMBB:
8689 // ...
8690 // %OrigVal = L Disp(%Base)
8691 // # fall through to LoopMBB
8692 MBB = StartMBB;
8693 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
8694 MBB->addSuccessor(LoopMBB);
8695
8696 // LoopMBB:
8697 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
8698 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
8699 // CompareOpcode %RotatedOldVal, %Src2
8700 // BRC KeepOldMask, UpdateMBB
8701 MBB = LoopMBB;
8702 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8703 .addReg(OrigVal).addMBB(StartMBB)
8704 .addReg(Dest).addMBB(UpdateMBB);
8705 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
8706 .addReg(OldVal).addReg(BitShift).addImm(0);
8707 BuildMI(MBB, DL, TII->get(CompareOpcode))
8708 .addReg(RotatedOldVal).addReg(Src2);
8709 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8710 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
8711 MBB->addSuccessor(UpdateMBB);
8712 MBB->addSuccessor(UseAltMBB);
8713
8714 // UseAltMBB:
8715 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
8716 // # fall through to UpdateMBB
8717 MBB = UseAltMBB;
8718 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
8719 .addReg(RotatedOldVal).addReg(Src2)
8720 .addImm(32).addImm(31 + BitSize).addImm(0);
8721 MBB->addSuccessor(UpdateMBB);
8722
8723 // UpdateMBB:
8724 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
8725 // [ %RotatedAltVal, UseAltMBB ]
8726 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
8727 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
8728 // JNE LoopMBB
8729 // # fall through to DoneMBB
8730 MBB = UpdateMBB;
8731 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
8732 .addReg(RotatedOldVal).addMBB(LoopMBB)
8733 .addReg(RotatedAltVal).addMBB(UseAltMBB);
8734 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
8735 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
8736 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
8737 .addReg(OldVal)
8738 .addReg(NewVal)
8739 .add(Base)
8740 .addImm(Disp);
8741 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8743 MBB->addSuccessor(LoopMBB);
8744 MBB->addSuccessor(DoneMBB);
8745
8746 MI.eraseFromParent();
8747 return DoneMBB;
8748}
8749
8750// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
8751// instruction MI.
8753SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
8754 MachineBasicBlock *MBB) const {
8755 MachineFunction &MF = *MBB->getParent();
8756 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8758
8759 // Extract the operands. Base can be a register or a frame index.
8760 Register Dest = MI.getOperand(0).getReg();
8761 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8762 int64_t Disp = MI.getOperand(2).getImm();
8763 Register CmpVal = MI.getOperand(3).getReg();
8764 Register OrigSwapVal = MI.getOperand(4).getReg();
8765 Register BitShift = MI.getOperand(5).getReg();
8766 Register NegBitShift = MI.getOperand(6).getReg();
8767 int64_t BitSize = MI.getOperand(7).getImm();
8768 DebugLoc DL = MI.getDebugLoc();
8769
8770 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
8771
8772 // Get the right opcodes for the displacement and zero-extension.
8773 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8774 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8775 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
8776 assert(LOpcode && CSOpcode && "Displacement out of range");
8777
8778 // Create virtual registers for temporary results.
8779 Register OrigOldVal = MRI.createVirtualRegister(RC);
8780 Register OldVal = MRI.createVirtualRegister(RC);
8781 Register SwapVal = MRI.createVirtualRegister(RC);
8782 Register StoreVal = MRI.createVirtualRegister(RC);
8783 Register OldValRot = MRI.createVirtualRegister(RC);
8784 Register RetryOldVal = MRI.createVirtualRegister(RC);
8785 Register RetrySwapVal = MRI.createVirtualRegister(RC);
8786
8787 // Insert 2 basic blocks for the loop.
8788 MachineBasicBlock *StartMBB = MBB;
8790 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8791 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
8792
8793 // StartMBB:
8794 // ...
8795 // %OrigOldVal = L Disp(%Base)
8796 // # fall through to LoopMBB
8797 MBB = StartMBB;
8798 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
8799 .add(Base)
8800 .addImm(Disp)
8801 .addReg(0);
8802 MBB->addSuccessor(LoopMBB);
8803
8804 // LoopMBB:
8805 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
8806 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
8807 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
8808 // ^^ The low BitSize bits contain the field
8809 // of interest.
8810 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
8811 // ^^ Replace the upper 32-BitSize bits of the
8812 // swap value with those that we loaded and rotated.
8813 // %Dest = LL[CH] %OldValRot
8814 // CR %Dest, %CmpVal
8815 // JNE DoneMBB
8816 // # Fall through to SetMBB
8817 MBB = LoopMBB;
8818 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8819 .addReg(OrigOldVal).addMBB(StartMBB)
8820 .addReg(RetryOldVal).addMBB(SetMBB);
8821 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
8822 .addReg(OrigSwapVal).addMBB(StartMBB)
8823 .addReg(RetrySwapVal).addMBB(SetMBB);
8824 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
8825 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
8826 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
8827 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
8828 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
8829 .addReg(OldValRot);
8830 BuildMI(MBB, DL, TII->get(SystemZ::CR))
8831 .addReg(Dest).addReg(CmpVal);
8832 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8835 MBB->addSuccessor(DoneMBB);
8836 MBB->addSuccessor(SetMBB);
8837
8838 // SetMBB:
8839 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
8840 // ^^ Rotate the new field to its proper position.
8841 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
8842 // JNE LoopMBB
8843 // # fall through to ExitMBB
8844 MBB = SetMBB;
8845 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
8846 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
8847 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
8848 .addReg(OldVal)
8849 .addReg(StoreVal)
8850 .add(Base)
8851 .addImm(Disp);
8852 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8854 MBB->addSuccessor(LoopMBB);
8855 MBB->addSuccessor(DoneMBB);
8856
8857 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
8858 // to the block after the loop. At this point, CC may have been defined
8859 // either by the CR in LoopMBB or by the CS in SetMBB.
8860 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))
8861 DoneMBB->addLiveIn(SystemZ::CC);
8862
8863 MI.eraseFromParent();
8864 return DoneMBB;
8865}
8866
8867// Emit a move from two GR64s to a GR128.
8869SystemZTargetLowering::emitPair128(MachineInstr &MI,
8870 MachineBasicBlock *MBB) const {
8871 MachineFunction &MF = *MBB->getParent();
8872 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8874 DebugLoc DL = MI.getDebugLoc();
8875
8876 Register Dest = MI.getOperand(0).getReg();
8877 Register Hi = MI.getOperand(1).getReg();
8878 Register Lo = MI.getOperand(2).getReg();
8879 Register Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8880 Register Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8881
8882 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Tmp1);
8883 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Tmp2)
8884 .addReg(Tmp1).addReg(Hi).addImm(SystemZ::subreg_h64);
8885 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
8886 .addReg(Tmp2).addReg(Lo).addImm(SystemZ::subreg_l64);
8887
8888 MI.eraseFromParent();
8889 return MBB;
8890}
8891
8892// Emit an extension from a GR64 to a GR128. ClearEven is true
8893// if the high register of the GR128 value must be cleared or false if
8894// it's "don't care".
8895MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
8897 bool ClearEven) const {
8898 MachineFunction &MF = *MBB->getParent();
8899 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8901 DebugLoc DL = MI.getDebugLoc();
8902
8903 Register Dest = MI.getOperand(0).getReg();
8904 Register Src = MI.getOperand(1).getReg();
8905 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8906
8907 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
8908 if (ClearEven) {
8909 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8910 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
8911
8912 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
8913 .addImm(0);
8914 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
8915 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
8916 In128 = NewIn128;
8917 }
8918 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
8919 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
8920
8921 MI.eraseFromParent();
8922 return MBB;
8923}
8924
8926SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
8928 unsigned Opcode, bool IsMemset) const {
8929 MachineFunction &MF = *MBB->getParent();
8930 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8932 DebugLoc DL = MI.getDebugLoc();
8933
8934 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
8935 uint64_t DestDisp = MI.getOperand(1).getImm();
8936 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
8937 uint64_t SrcDisp;
8938
8939 // Fold the displacement Disp if it is out of range.
8940 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
8941 if (!isUInt<12>(Disp)) {
8942 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8943 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
8944 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
8945 .add(Base).addImm(Disp).addReg(0);
8946 Base = MachineOperand::CreateReg(Reg, false);
8947 Disp = 0;
8948 }
8949 };
8950
8951 if (!IsMemset) {
8952 SrcBase = earlyUseOperand(MI.getOperand(2));
8953 SrcDisp = MI.getOperand(3).getImm();
8954 } else {
8955 SrcBase = DestBase;
8956 SrcDisp = DestDisp++;
8957 foldDisplIfNeeded(DestBase, DestDisp);
8958 }
8959
8960 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
8961 bool IsImmForm = LengthMO.isImm();
8962 bool IsRegForm = !IsImmForm;
8963
8964 // Build and insert one Opcode of Length, with special treatment for memset.
8965 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
8967 MachineOperand DBase, uint64_t DDisp,
8969 unsigned Length) -> void {
8970 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
8971 if (IsMemset) {
8972 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
8973 if (ByteMO.isImm())
8974 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
8975 .add(SBase).addImm(SDisp).add(ByteMO);
8976 else
8977 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
8978 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
8979 if (--Length == 0)
8980 return;
8981 }
8982 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
8983 .add(DBase).addImm(DDisp).addImm(Length)
8984 .add(SBase).addImm(SDisp)
8985 .setMemRefs(MI.memoperands());
8986 };
8987
8988 bool NeedsLoop = false;
8989 uint64_t ImmLength = 0;
8990 Register LenAdjReg = SystemZ::NoRegister;
8991 if (IsImmForm) {
8992 ImmLength = LengthMO.getImm();
8993 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
8994 if (ImmLength == 0) {
8995 MI.eraseFromParent();
8996 return MBB;
8997 }
8998 if (Opcode == SystemZ::CLC) {
8999 if (ImmLength > 3 * 256)
9000 // A two-CLC sequence is a clear win over a loop, not least because
9001 // it needs only one branch. A three-CLC sequence needs the same
9002 // number of branches as a loop (i.e. 2), but is shorter. That
9003 // brings us to lengths greater than 768 bytes. It seems relatively
9004 // likely that a difference will be found within the first 768 bytes,
9005 // so we just optimize for the smallest number of branch
9006 // instructions, in order to avoid polluting the prediction buffer
9007 // too much.
9008 NeedsLoop = true;
9009 } else if (ImmLength > 6 * 256)
9010 // The heuristic we use is to prefer loops for anything that would
9011 // require 7 or more MVCs. With these kinds of sizes there isn't much
9012 // to choose between straight-line code and looping code, since the
9013 // time will be dominated by the MVCs themselves.
9014 NeedsLoop = true;
9015 } else {
9016 NeedsLoop = true;
9017 LenAdjReg = LengthMO.getReg();
9018 }
9019
9020 // When generating more than one CLC, all but the last will need to
9021 // branch to the end when a difference is found.
9022 MachineBasicBlock *EndMBB =
9023 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
9025 : nullptr);
9026
9027 if (NeedsLoop) {
9028 Register StartCountReg =
9029 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
9030 if (IsImmForm) {
9031 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
9032 ImmLength &= 255;
9033 } else {
9034 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
9035 .addReg(LenAdjReg)
9036 .addReg(0)
9037 .addImm(8);
9038 }
9039
9040 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
9041 auto loadZeroAddress = [&]() -> MachineOperand {
9042 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9043 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
9044 return MachineOperand::CreateReg(Reg, false);
9045 };
9046 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
9047 DestBase = loadZeroAddress();
9048 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
9049 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
9050
9051 MachineBasicBlock *StartMBB = nullptr;
9052 MachineBasicBlock *LoopMBB = nullptr;
9053 MachineBasicBlock *NextMBB = nullptr;
9054 MachineBasicBlock *DoneMBB = nullptr;
9055 MachineBasicBlock *AllDoneMBB = nullptr;
9056
9057 Register StartSrcReg = forceReg(MI, SrcBase, TII);
9058 Register StartDestReg =
9059 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
9060
9061 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
9062 Register ThisSrcReg = MRI.createVirtualRegister(RC);
9063 Register ThisDestReg =
9064 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
9065 Register NextSrcReg = MRI.createVirtualRegister(RC);
9066 Register NextDestReg =
9067 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
9068 RC = &SystemZ::GR64BitRegClass;
9069 Register ThisCountReg = MRI.createVirtualRegister(RC);
9070 Register NextCountReg = MRI.createVirtualRegister(RC);
9071
9072 if (IsRegForm) {
9073 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
9074 StartMBB = SystemZ::emitBlockAfter(MBB);
9075 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9076 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
9077 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
9078
9079 // MBB:
9080 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
9081 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9082 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
9083 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9085 .addMBB(AllDoneMBB);
9086 MBB->addSuccessor(AllDoneMBB);
9087 if (!IsMemset)
9088 MBB->addSuccessor(StartMBB);
9089 else {
9090 // MemsetOneCheckMBB:
9091 // # Jump to MemsetOneMBB for a memset of length 1, or
9092 // # fall thru to StartMBB.
9093 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
9094 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
9095 MBB->addSuccessor(MemsetOneCheckMBB);
9096 MBB = MemsetOneCheckMBB;
9097 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9098 .addReg(LenAdjReg).addImm(-1);
9099 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9101 .addMBB(MemsetOneMBB);
9102 MBB->addSuccessor(MemsetOneMBB, {10, 100});
9103 MBB->addSuccessor(StartMBB, {90, 100});
9104
9105 // MemsetOneMBB:
9106 // # Jump back to AllDoneMBB after a single MVI or STC.
9107 MBB = MemsetOneMBB;
9108 insertMemMemOp(MBB, MBB->end(),
9109 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
9110 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
9111 1);
9112 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
9113 MBB->addSuccessor(AllDoneMBB);
9114 }
9115
9116 // StartMBB:
9117 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
9118 MBB = StartMBB;
9119 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9120 .addReg(StartCountReg).addImm(0);
9121 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9123 .addMBB(DoneMBB);
9124 MBB->addSuccessor(DoneMBB);
9125 MBB->addSuccessor(LoopMBB);
9126 }
9127 else {
9128 StartMBB = MBB;
9129 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
9130 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9131 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
9132
9133 // StartMBB:
9134 // # fall through to LoopMBB
9135 MBB->addSuccessor(LoopMBB);
9136
9137 DestBase = MachineOperand::CreateReg(NextDestReg, false);
9138 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
9139 if (EndMBB && !ImmLength)
9140 // If the loop handled the whole CLC range, DoneMBB will be empty with
9141 // CC live-through into EndMBB, so add it as live-in.
9142 DoneMBB->addLiveIn(SystemZ::CC);
9143 }
9144
9145 // LoopMBB:
9146 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
9147 // [ %NextDestReg, NextMBB ]
9148 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
9149 // [ %NextSrcReg, NextMBB ]
9150 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
9151 // [ %NextCountReg, NextMBB ]
9152 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
9153 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
9154 // ( JLH EndMBB )
9155 //
9156 // The prefetch is used only for MVC. The JLH is used only for CLC.
9157 MBB = LoopMBB;
9158 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
9159 .addReg(StartDestReg).addMBB(StartMBB)
9160 .addReg(NextDestReg).addMBB(NextMBB);
9161 if (!HaveSingleBase)
9162 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
9163 .addReg(StartSrcReg).addMBB(StartMBB)
9164 .addReg(NextSrcReg).addMBB(NextMBB);
9165 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
9166 .addReg(StartCountReg).addMBB(StartMBB)
9167 .addReg(NextCountReg).addMBB(NextMBB);
9168 if (Opcode == SystemZ::MVC)
9169 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
9171 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
9172 insertMemMemOp(MBB, MBB->end(),
9173 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
9174 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
9175 if (EndMBB) {
9176 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9178 .addMBB(EndMBB);
9179 MBB->addSuccessor(EndMBB);
9180 MBB->addSuccessor(NextMBB);
9181 }
9182
9183 // NextMBB:
9184 // %NextDestReg = LA 256(%ThisDestReg)
9185 // %NextSrcReg = LA 256(%ThisSrcReg)
9186 // %NextCountReg = AGHI %ThisCountReg, -1
9187 // CGHI %NextCountReg, 0
9188 // JLH LoopMBB
9189 // # fall through to DoneMBB
9190 //
9191 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
9192 MBB = NextMBB;
9193 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
9194 .addReg(ThisDestReg).addImm(256).addReg(0);
9195 if (!HaveSingleBase)
9196 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
9197 .addReg(ThisSrcReg).addImm(256).addReg(0);
9198 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
9199 .addReg(ThisCountReg).addImm(-1);
9200 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9201 .addReg(NextCountReg).addImm(0);
9202 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9204 .addMBB(LoopMBB);
9205 MBB->addSuccessor(LoopMBB);
9206 MBB->addSuccessor(DoneMBB);
9207
9208 MBB = DoneMBB;
9209 if (IsRegForm) {
9210 // DoneMBB:
9211 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
9212 // # Use EXecute Relative Long for the remainder of the bytes. The target
9213 // instruction of the EXRL will have a length field of 1 since 0 is an
9214 // illegal value. The number of bytes processed becomes (%LenAdjReg &
9215 // 0xff) + 1.
9216 // # Fall through to AllDoneMBB.
9217 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9218 Register RemDestReg = HaveSingleBase ? RemSrcReg
9219 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9220 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
9221 .addReg(StartDestReg).addMBB(StartMBB)
9222 .addReg(NextDestReg).addMBB(NextMBB);
9223 if (!HaveSingleBase)
9224 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
9225 .addReg(StartSrcReg).addMBB(StartMBB)
9226 .addReg(NextSrcReg).addMBB(NextMBB);
9227 if (IsMemset)
9228 insertMemMemOp(MBB, MBB->end(),
9229 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
9230 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
9231 MachineInstrBuilder EXRL_MIB =
9232 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
9233 .addImm(Opcode)
9234 .addReg(LenAdjReg)
9235 .addReg(RemDestReg).addImm(DestDisp)
9236 .addReg(RemSrcReg).addImm(SrcDisp);
9237 MBB->addSuccessor(AllDoneMBB);
9238 MBB = AllDoneMBB;
9239 if (Opcode != SystemZ::MVC) {
9240 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
9241 if (EndMBB)
9242 MBB->addLiveIn(SystemZ::CC);
9243 }
9244 }
9246 }
9247
9248 // Handle any remaining bytes with straight-line code.
9249 while (ImmLength > 0) {
9250 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
9251 // The previous iteration might have created out-of-range displacements.
9252 // Apply them using LA/LAY if so.
9253 foldDisplIfNeeded(DestBase, DestDisp);
9254 foldDisplIfNeeded(SrcBase, SrcDisp);
9255 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
9256 DestDisp += ThisLength;
9257 SrcDisp += ThisLength;
9258 ImmLength -= ThisLength;
9259 // If there's another CLC to go, branch to the end if a difference
9260 // was found.
9261 if (EndMBB && ImmLength > 0) {
9263 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9265 .addMBB(EndMBB);
9266 MBB->addSuccessor(EndMBB);
9267 MBB->addSuccessor(NextMBB);
9268 MBB = NextMBB;
9269 }
9270 }
9271 if (EndMBB) {
9272 MBB->addSuccessor(EndMBB);
9273 MBB = EndMBB;
9274 MBB->addLiveIn(SystemZ::CC);
9275 }
9276
9277 MI.eraseFromParent();
9278 return MBB;
9279}
9280
9281// Decompose string pseudo-instruction MI into a loop that continually performs
9282// Opcode until CC != 3.
9283MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
9284 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9285 MachineFunction &MF = *MBB->getParent();
9286 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9288 DebugLoc DL = MI.getDebugLoc();
9289
9290 uint64_t End1Reg = MI.getOperand(0).getReg();
9291 uint64_t Start1Reg = MI.getOperand(1).getReg();
9292 uint64_t Start2Reg = MI.getOperand(2).getReg();
9293 uint64_t CharReg = MI.getOperand(3).getReg();
9294
9295 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
9296 uint64_t This1Reg = MRI.createVirtualRegister(RC);
9297 uint64_t This2Reg = MRI.createVirtualRegister(RC);
9298 uint64_t End2Reg = MRI.createVirtualRegister(RC);
9299
9300 MachineBasicBlock *StartMBB = MBB;
9302 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9303
9304 // StartMBB:
9305 // # fall through to LoopMBB
9306 MBB->addSuccessor(LoopMBB);
9307
9308 // LoopMBB:
9309 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
9310 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
9311 // R0L = %CharReg
9312 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
9313 // JO LoopMBB
9314 // # fall through to DoneMBB
9315 //
9316 // The load of R0L can be hoisted by post-RA LICM.
9317 MBB = LoopMBB;
9318
9319 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
9320 .addReg(Start1Reg).addMBB(StartMBB)
9321 .addReg(End1Reg).addMBB(LoopMBB);
9322 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
9323 .addReg(Start2Reg).addMBB(StartMBB)
9324 .addReg(End2Reg).addMBB(LoopMBB);
9325 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
9326 BuildMI(MBB, DL, TII->get(Opcode))
9327 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
9328 .addReg(This1Reg).addReg(This2Reg);
9329 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9331 MBB->addSuccessor(LoopMBB);
9332 MBB->addSuccessor(DoneMBB);
9333
9334 DoneMBB->addLiveIn(SystemZ::CC);
9335
9336 MI.eraseFromParent();
9337 return DoneMBB;
9338}
9339
9340// Update TBEGIN instruction with final opcode and register clobbers.
9341MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
9342 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
9343 bool NoFloat) const {
9344 MachineFunction &MF = *MBB->getParent();
9345 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9346 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9347
9348 // Update opcode.
9349 MI.setDesc(TII->get(Opcode));
9350
9351 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
9352 // Make sure to add the corresponding GRSM bits if they are missing.
9353 uint64_t Control = MI.getOperand(2).getImm();
9354 static const unsigned GPRControlBit[16] = {
9355 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
9356 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
9357 };
9358 Control |= GPRControlBit[15];
9359 if (TFI->hasFP(MF))
9360 Control |= GPRControlBit[11];
9361 MI.getOperand(2).setImm(Control);
9362
9363 // Add GPR clobbers.
9364 for (int I = 0; I < 16; I++) {
9365 if ((Control & GPRControlBit[I]) == 0) {
9366 unsigned Reg = SystemZMC::GR64Regs[I];
9367 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9368 }
9369 }
9370
9371 // Add FPR/VR clobbers.
9372 if (!NoFloat && (Control & 4) != 0) {
9373 if (Subtarget.hasVector()) {
9374 for (unsigned Reg : SystemZMC::VR128Regs) {
9375 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9376 }
9377 } else {
9378 for (unsigned Reg : SystemZMC::FP64Regs) {
9379 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9380 }
9381 }
9382 }
9383
9384 return MBB;
9385}
9386
9387MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
9388 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9389 MachineFunction &MF = *MBB->getParent();
9391 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9392 DebugLoc DL = MI.getDebugLoc();
9393
9394 Register SrcReg = MI.getOperand(0).getReg();
9395
9396 // Create new virtual register of the same class as source.
9397 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
9398 Register DstReg = MRI->createVirtualRegister(RC);
9399
9400 // Replace pseudo with a normal load-and-test that models the def as
9401 // well.
9402 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
9403 .addReg(SrcReg)
9404 .setMIFlags(MI.getFlags());
9405 MI.eraseFromParent();
9406
9407 return MBB;
9408}
9409
9410MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
9412 MachineFunction &MF = *MBB->getParent();
9414 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9415 DebugLoc DL = MI.getDebugLoc();
9416 const unsigned ProbeSize = getStackProbeSize(MF);
9417 Register DstReg = MI.getOperand(0).getReg();
9418 Register SizeReg = MI.getOperand(2).getReg();
9419
9420 MachineBasicBlock *StartMBB = MBB;
9422 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
9423 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
9424 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
9425 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
9426
9429
9430 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9431 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9432
9433 // LoopTestMBB
9434 // BRC TailTestMBB
9435 // # fallthrough to LoopBodyMBB
9436 StartMBB->addSuccessor(LoopTestMBB);
9437 MBB = LoopTestMBB;
9438 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
9439 .addReg(SizeReg)
9440 .addMBB(StartMBB)
9441 .addReg(IncReg)
9442 .addMBB(LoopBodyMBB);
9443 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
9444 .addReg(PHIReg)
9445 .addImm(ProbeSize);
9446 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9448 .addMBB(TailTestMBB);
9449 MBB->addSuccessor(LoopBodyMBB);
9450 MBB->addSuccessor(TailTestMBB);
9451
9452 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
9453 // J LoopTestMBB
9454 MBB = LoopBodyMBB;
9455 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
9456 .addReg(PHIReg)
9457 .addImm(ProbeSize);
9458 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
9459 .addReg(SystemZ::R15D)
9460 .addImm(ProbeSize);
9461 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9462 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
9463 .setMemRefs(VolLdMMO);
9464 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
9465 MBB->addSuccessor(LoopTestMBB);
9466
9467 // TailTestMBB
9468 // BRC DoneMBB
9469 // # fallthrough to TailMBB
9470 MBB = TailTestMBB;
9471 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9472 .addReg(PHIReg)
9473 .addImm(0);
9474 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9476 .addMBB(DoneMBB);
9477 MBB->addSuccessor(TailMBB);
9478 MBB->addSuccessor(DoneMBB);
9479
9480 // TailMBB
9481 // # fallthrough to DoneMBB
9482 MBB = TailMBB;
9483 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
9484 .addReg(SystemZ::R15D)
9485 .addReg(PHIReg);
9486 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9487 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
9488 .setMemRefs(VolLdMMO);
9489 MBB->addSuccessor(DoneMBB);
9490
9491 // DoneMBB
9492 MBB = DoneMBB;
9493 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
9494 .addReg(SystemZ::R15D);
9495
9496 MI.eraseFromParent();
9497 return DoneMBB;
9498}
9499
9500SDValue SystemZTargetLowering::
9501getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
9503 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
9504 SDLoc DL(SP);
9505 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
9506 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
9507}
9508
9511 switch (MI.getOpcode()) {
9512 case SystemZ::ADJCALLSTACKDOWN:
9513 case SystemZ::ADJCALLSTACKUP:
9514 return emitAdjCallStack(MI, MBB);
9515
9516 case SystemZ::Select32:
9517 case SystemZ::Select64:
9518 case SystemZ::Select128:
9519 case SystemZ::SelectF32:
9520 case SystemZ::SelectF64:
9521 case SystemZ::SelectF128:
9522 case SystemZ::SelectVR32:
9523 case SystemZ::SelectVR64:
9524 case SystemZ::SelectVR128:
9525 return emitSelect(MI, MBB);
9526
9527 case SystemZ::CondStore8Mux:
9528 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
9529 case SystemZ::CondStore8MuxInv:
9530 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
9531 case SystemZ::CondStore16Mux:
9532 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
9533 case SystemZ::CondStore16MuxInv:
9534 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
9535 case SystemZ::CondStore32Mux:
9536 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
9537 case SystemZ::CondStore32MuxInv:
9538 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
9539 case SystemZ::CondStore8:
9540 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
9541 case SystemZ::CondStore8Inv:
9542 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
9543 case SystemZ::CondStore16:
9544 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
9545 case SystemZ::CondStore16Inv:
9546 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
9547 case SystemZ::CondStore32:
9548 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
9549 case SystemZ::CondStore32Inv:
9550 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
9551 case SystemZ::CondStore64:
9552 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
9553 case SystemZ::CondStore64Inv:
9554 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
9555 case SystemZ::CondStoreF32:
9556 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
9557 case SystemZ::CondStoreF32Inv:
9558 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
9559 case SystemZ::CondStoreF64:
9560 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
9561 case SystemZ::CondStoreF64Inv:
9562 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
9563
9564 case SystemZ::SCmp128Hi:
9565 return emitICmp128Hi(MI, MBB, false);
9566 case SystemZ::UCmp128Hi:
9567 return emitICmp128Hi(MI, MBB, true);
9568
9569 case SystemZ::PAIR128:
9570 return emitPair128(MI, MBB);
9571 case SystemZ::AEXT128:
9572 return emitExt128(MI, MBB, false);
9573 case SystemZ::ZEXT128:
9574 return emitExt128(MI, MBB, true);
9575
9576 case SystemZ::ATOMIC_SWAPW:
9577 return emitAtomicLoadBinary(MI, MBB, 0);
9578
9579 case SystemZ::ATOMIC_LOADW_AR:
9580 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
9581 case SystemZ::ATOMIC_LOADW_AFI:
9582 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
9583
9584 case SystemZ::ATOMIC_LOADW_SR:
9585 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
9586
9587 case SystemZ::ATOMIC_LOADW_NR:
9588 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
9589 case SystemZ::ATOMIC_LOADW_NILH:
9590 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
9591
9592 case SystemZ::ATOMIC_LOADW_OR:
9593 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
9594 case SystemZ::ATOMIC_LOADW_OILH:
9595 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
9596
9597 case SystemZ::ATOMIC_LOADW_XR:
9598 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
9599 case SystemZ::ATOMIC_LOADW_XILF:
9600 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
9601
9602 case SystemZ::ATOMIC_LOADW_NRi:
9603 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
9604 case SystemZ::ATOMIC_LOADW_NILHi:
9605 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
9606
9607 case SystemZ::ATOMIC_LOADW_MIN:
9608 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
9609 case SystemZ::ATOMIC_LOADW_MAX:
9610 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
9611 case SystemZ::ATOMIC_LOADW_UMIN:
9612 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
9613 case SystemZ::ATOMIC_LOADW_UMAX:
9614 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
9615
9616 case SystemZ::ATOMIC_CMP_SWAPW:
9617 return emitAtomicCmpSwapW(MI, MBB);
9618 case SystemZ::MVCImm:
9619 case SystemZ::MVCReg:
9620 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
9621 case SystemZ::NCImm:
9622 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
9623 case SystemZ::OCImm:
9624 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
9625 case SystemZ::XCImm:
9626 case SystemZ::XCReg:
9627 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
9628 case SystemZ::CLCImm:
9629 case SystemZ::CLCReg:
9630 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
9631 case SystemZ::MemsetImmImm:
9632 case SystemZ::MemsetImmReg:
9633 case SystemZ::MemsetRegImm:
9634 case SystemZ::MemsetRegReg:
9635 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
9636 case SystemZ::CLSTLoop:
9637 return emitStringWrapper(MI, MBB, SystemZ::CLST);
9638 case SystemZ::MVSTLoop:
9639 return emitStringWrapper(MI, MBB, SystemZ::MVST);
9640 case SystemZ::SRSTLoop:
9641 return emitStringWrapper(MI, MBB, SystemZ::SRST);
9642 case SystemZ::TBEGIN:
9643 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
9644 case SystemZ::TBEGIN_nofloat:
9645 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
9646 case SystemZ::TBEGINC:
9647 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
9648 case SystemZ::LTEBRCompare_Pseudo:
9649 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
9650 case SystemZ::LTDBRCompare_Pseudo:
9651 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
9652 case SystemZ::LTXBRCompare_Pseudo:
9653 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
9654
9655 case SystemZ::PROBED_ALLOCA:
9656 return emitProbedAlloca(MI, MBB);
9657
9658 case TargetOpcode::STACKMAP:
9659 case TargetOpcode::PATCHPOINT:
9660 return emitPatchPoint(MI, MBB);
9661
9662 default:
9663 llvm_unreachable("Unexpected instr type to insert");
9664 }
9665}
9666
9667// This is only used by the isel schedulers, and is needed only to prevent
9668// compiler from crashing when list-ilp is used.
9669const TargetRegisterClass *
9670SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
9671 if (VT == MVT::Untyped)
9672 return &SystemZ::ADDR128BitRegClass;
9674}
9675
9676SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
9677 SelectionDAG &DAG) const {
9678 SDLoc dl(Op);
9679 /*
9680 The rounding method is in FPC Byte 3 bits 6-7, and has the following
9681 settings:
9682 00 Round to nearest
9683 01 Round to 0
9684 10 Round to +inf
9685 11 Round to -inf
9686
9687 FLT_ROUNDS, on the other hand, expects the following:
9688 -1 Undefined
9689 0 Round to 0
9690 1 Round to nearest
9691 2 Round to +inf
9692 3 Round to -inf
9693 */
9694
9695 // Save FPC to register.
9696 SDValue Chain = Op.getOperand(0);
9697 SDValue EFPC(
9698 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
9699 Chain = EFPC.getValue(1);
9700
9701 // Transform as necessary
9702 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
9703 DAG.getConstant(3, dl, MVT::i32));
9704 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
9705 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
9706 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
9707 DAG.getConstant(1, dl, MVT::i32)));
9708
9709 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
9710 DAG.getConstant(1, dl, MVT::i32));
9711 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
9712
9713 return DAG.getMergeValues({RetVal, Chain}, dl);
9714}
9715
9716SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
9717 SelectionDAG &DAG) const {
9718 EVT VT = Op.getValueType();
9719 Op = Op.getOperand(0);
9720 EVT OpVT = Op.getValueType();
9721
9722 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
9723
9724 SDLoc DL(Op);
9725
9726 // load a 0 vector for the third operand of VSUM.
9727 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));
9728
9729 // execute VSUM.
9730 switch (OpVT.getScalarSizeInBits()) {
9731 case 8:
9732 case 16:
9733 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
9734 [[fallthrough]];
9735 case 32:
9736 case 64:
9737 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
9738 DAG.getBitcast(Op.getValueType(), Zero));
9739 break;
9740 case 128:
9741 break; // VSUM over v1i128 should not happen and would be a noop
9742 default:
9743 llvm_unreachable("Unexpected scalar size.");
9744 }
9745 // Cast to original vector type, retrieve last element.
9746 return DAG.getNode(
9747 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),
9748 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));
9749}
unsigned const MachineRegisterInfo * MRI
#define Success
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
amdgpu AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
Function Alias Analysis Results
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
IRTranslator LLVM IR MI
#define RegName(no)
lazy value info
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
uint64_t High
LLVMContext & Context
#define P(N)
const char LLVMTargetMachineRef TM
static bool isSelectPseudo(MachineInstr &MI)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
#define OPCODE(NAME)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1364
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition: APInt.h:300
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
@ Add
*p = old + v
Definition: Instructions.h:764
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ Xor
*p = old ^ v
Definition: Instructions.h:774
BinOp getOperation() const
Definition: Instructions.h:845
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
The address of a basic block.
Definition: Constants.h:889
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:703
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:715
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:264
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:677
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:563
bool hasPrivateLinkage() const
Definition: GlobalValue.h:527
bool hasInternalLinkage() const
Definition: GlobalValue.h:526
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:184
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setMaxCallFrameSize(unsigned S)
MachineFunctionProperties & reset(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getMachineOpcode() const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:732
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:862
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:845
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:739
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void reserve(size_type N)
Definition: SmallVector.h:676
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:462
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:676
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
iterator end() const
Definition: StringRef.h:113
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
A SystemZ-specific class detailing special use registers particular for calling conventions.
A SystemZ-specific constant pool value.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const
const TargetFrameLowering * getFrameLowering() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
const SystemZRegisterInfo * getRegisterInfo() const override
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
bool useSoftFloat() const override
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Let target indicate that an extending atomic load of the specified type is legal.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
use_iterator use_begin()
Definition: Value.h:360
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:199
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:751
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:237
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1133
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1129
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:724
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:477
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:251
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1276
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:715
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1162
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1278
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1248
@ STRICT_FCEIL
Definition: ISDOpcodes.h:427
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1279
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1038
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:784
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:484
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
Definition: ISDOpcodes.h:1261
@ STRICT_FMINIMUM
Definition: ISDOpcodes.h:437
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:791
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:391
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:689
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1235
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1240
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:821
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:256
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:478
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1274
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:904
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:230
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1275
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:412
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1407
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:775
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:451
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:621
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1228
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:995
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:931
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1084
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:328
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1277
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1063
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:508
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:728
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1244
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:212
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:223
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:628
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1158
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:324
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:431
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1371
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:881
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:652
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:706
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:601
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1272
@ STRICT_FMAXIMUM
Definition: ISDOpcodes.h:436
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:425
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:536
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:781
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:426
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:743
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1280
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:972
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1048
@ ConstantPool
Definition: ISDOpcodes.h:82
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:810
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:799
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:889
@ STRICT_FROUND
Definition: ISDOpcodes.h:429
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:737
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:304
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:450
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:428
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1270
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:444
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:466
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:443
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:991
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1271
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:837
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1189
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:471
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:681
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1215
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:401
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:525
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1269
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:870
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:832
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:424
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:787
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1077
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:764
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:494
@ AssertZext
Definition: ISDOpcodes.h:62
@ STRICT_FRINT
Definition: ISDOpcodes.h:423
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1327
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:516
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1530
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1510
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition: SystemZ.h:40
static bool isImmHH(uint64_t Val)
Definition: SystemZ.h:176
const unsigned CCMASK_TEND
Definition: SystemZ.h:97
const unsigned CCMASK_CS_EQ
Definition: SystemZ.h:67
const unsigned CCMASK_TBEGIN
Definition: SystemZ.h:92
const unsigned CCMASK_0
Definition: SystemZ.h:27
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition: SystemZ.h:82
const unsigned CCMASK_LOGICAL_CARRY
Definition: SystemZ.h:60
const unsigned TDCMASK_NORMAL_MINUS
Definition: SystemZ.h:122
const unsigned CCMASK_TDC
Definition: SystemZ.h:109
const unsigned CCMASK_FCMP
Definition: SystemZ.h:48
const unsigned CCMASK_TM_SOME_0
Definition: SystemZ.h:81
static bool isImmHL(uint64_t Val)
Definition: SystemZ.h:171
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition: SystemZ.h:124
const unsigned PFD_READ
Definition: SystemZ.h:115
const unsigned CCMASK_1
Definition: SystemZ.h:28
const unsigned TDCMASK_NORMAL_PLUS
Definition: SystemZ.h:121
const unsigned PFD_WRITE
Definition: SystemZ.h:116
const unsigned CCMASK_CMP_GT
Definition: SystemZ.h:37
const unsigned TDCMASK_QNAN_MINUS
Definition: SystemZ.h:128
const unsigned CCMASK_CS
Definition: SystemZ.h:69
const unsigned CCMASK_ANY
Definition: SystemZ.h:31
const unsigned CCMASK_ARITH
Definition: SystemZ.h:55
const unsigned CCMASK_TM_MIXED_MSB_0
Definition: SystemZ.h:78
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition: SystemZ.h:123
static bool isImmLL(uint64_t Val)
Definition: SystemZ.h:161
const unsigned VectorBits
Definition: SystemZ.h:154
static bool isImmLH(uint64_t Val)
Definition: SystemZ.h:166
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition: SystemZ.h:125
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition: SystemZ.h:77
const unsigned IPM_CC
Definition: SystemZ.h:112
const unsigned CCMASK_CMP_LE
Definition: SystemZ.h:39
const unsigned CCMASK_CMP_O
Definition: SystemZ.h:44
const unsigned CCMASK_CMP_EQ
Definition: SystemZ.h:35
const unsigned VectorBytes
Definition: SystemZ.h:158
const unsigned TDCMASK_INFINITY_MINUS
Definition: SystemZ.h:126
const unsigned CCMASK_ICMP
Definition: SystemZ.h:47
const unsigned CCMASK_VCMP_ALL
Definition: SystemZ.h:101
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition: SystemZ.h:104
const unsigned CCMASK_TM_MIXED_MSB_1
Definition: SystemZ.h:79
const unsigned CCMASK_TM_MSB_0
Definition: SystemZ.h:83
const unsigned CCMASK_ARITH_OVERFLOW
Definition: SystemZ.h:54
const unsigned CCMASK_CS_NE
Definition: SystemZ.h:68
const unsigned TDCMASK_SNAN_PLUS
Definition: SystemZ.h:129
const unsigned CCMASK_TM
Definition: SystemZ.h:85
const unsigned CCMASK_3
Definition: SystemZ.h:30
const unsigned CCMASK_CMP_LT
Definition: SystemZ.h:36
const unsigned CCMASK_CMP_NE
Definition: SystemZ.h:38
const unsigned TDCMASK_ZERO_PLUS
Definition: SystemZ.h:119
const unsigned TDCMASK_QNAN_PLUS
Definition: SystemZ.h:127
const unsigned TDCMASK_ZERO_MINUS
Definition: SystemZ.h:120
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition: SystemZ.h:80
const unsigned CCMASK_LOGICAL_BORROW
Definition: SystemZ.h:62
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition: SystemZ.h:43
const unsigned CCMASK_LOGICAL
Definition: SystemZ.h:64
const unsigned CCMASK_TM_MSB_1
Definition: SystemZ.h:84
const unsigned TDCMASK_SNAN_MINUS
Definition: SystemZ.h:130
@ GeneralDynamic
Definition: CodeGen.h:46
@ GS
Definition: X86.h:205
Reg
All possible values of the reg field in the ModR/M byte.
support::ulittle32_t Word
Definition: IRSymtab.h:52
NodeAddr< CodeNode * > Code
Definition: RDFGraph.h:388
constexpr const char32_t SBase
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:337
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Done
Definition: Threading.h:61
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ Mul
Product of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:465
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define EQ(a, b)
Definition: regexec.c:112
#define NC
Definition: regutils.h:42
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:182
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:168
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:307
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:176
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:141
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})