LLVM 20.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
23#include "llvm/IR/GlobalAlias.h"
25#include "llvm/IR/Intrinsics.h"
26#include "llvm/IR/IntrinsicsS390.h"
30#include <cctype>
31#include <optional>
32
33using namespace llvm;
34
35#define DEBUG_TYPE "systemz-lower"
36
37// Temporarily let this be disabled by default until all known problems
38// related to argument extensions are fixed.
40 "argext-abi-check", cl::init(false),
41 cl::desc("Verify that narrow int args are properly extended per the "
42 "SystemZ ABI."));
43
44namespace {
45// Represents information about a comparison.
46struct Comparison {
47 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
48 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
49 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
50
51 // The operands to the comparison.
52 SDValue Op0, Op1;
53
54 // Chain if this is a strict floating-point comparison.
55 SDValue Chain;
56
57 // The opcode that should be used to compare Op0 and Op1.
58 unsigned Opcode;
59
60 // A SystemZICMP value. Only used for integer comparisons.
61 unsigned ICmpType;
62
63 // The mask of CC values that Opcode can produce.
64 unsigned CCValid;
65
66 // The mask of CC values for which the original condition is true.
67 unsigned CCMask;
68};
69} // end anonymous namespace
70
71// Classify VT as either 32 or 64 bit.
72static bool is32Bit(EVT VT) {
73 switch (VT.getSimpleVT().SimpleTy) {
74 case MVT::i32:
75 return true;
76 case MVT::i64:
77 return false;
78 default:
79 llvm_unreachable("Unsupported type");
80 }
81}
82
83// Return a version of MachineOperand that can be safely used before the
84// final use.
86 if (Op.isReg())
87 Op.setIsKill(false);
88 return Op;
89}
90
92 const SystemZSubtarget &STI)
93 : TargetLowering(TM), Subtarget(STI) {
94 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
95
96 auto *Regs = STI.getSpecialRegisters();
97
98 // Set up the register classes.
99 if (Subtarget.hasHighWord())
100 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
101 else
102 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
103 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
104 if (!useSoftFloat()) {
105 if (Subtarget.hasVector()) {
106 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
107 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
108 } else {
109 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
110 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
111 }
112 if (Subtarget.hasVectorEnhancements1())
113 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
114 else
115 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
116
117 if (Subtarget.hasVector()) {
118 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
119 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
120 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
121 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
122 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
123 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
124 }
125
126 if (Subtarget.hasVector())
127 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
128 }
129
130 // Compute derived properties from the register classes
132
133 // Set up special registers.
134 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
135
136 // TODO: It may be better to default to latency-oriented scheduling, however
137 // LLVM's current latency-oriented scheduler can't handle physreg definitions
138 // such as SystemZ has with CC, so set this to the register-pressure
139 // scheduler, because it can.
141
144
146
147 // Instructions are strings of 2-byte aligned 2-byte values.
149 // For performance reasons we prefer 16-byte alignment.
151
152 // Handle operations that are handled in a similar way for all types.
153 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
154 I <= MVT::LAST_FP_VALUETYPE;
155 ++I) {
157 if (isTypeLegal(VT)) {
158 // Lower SET_CC into an IPM-based sequence.
162
163 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
165
166 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
169 }
170 }
171
172 // Expand jump table branches as address arithmetic followed by an
173 // indirect jump.
175
176 // Expand BRCOND into a BR_CC (see above).
178
179 // Handle integer types except i128.
180 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
181 I <= MVT::LAST_INTEGER_VALUETYPE;
182 ++I) {
184 if (isTypeLegal(VT) && VT != MVT::i128) {
186
187 // Expand individual DIV and REMs into DIVREMs.
194
195 // Support addition/subtraction with overflow.
198
199 // Support addition/subtraction with carry.
202
203 // Support carry in as value rather than glue.
206
207 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
208 // available, or if the operand is constant.
210
211 // Use POPCNT on z196 and above.
212 if (Subtarget.hasPopulationCount())
214 else
216
217 // No special instructions for these.
220
221 // Use *MUL_LOHI where possible instead of MULH*.
226
227 // Only z196 and above have native support for conversions to unsigned.
228 // On z10, promoting to i64 doesn't generate an inexact condition for
229 // values that are outside the i32 range but in the i64 range, so use
230 // the default expansion.
231 if (!Subtarget.hasFPExtension())
233
234 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
235 // default to Expand, so need to be modified to Legal where appropriate.
237 if (Subtarget.hasFPExtension())
239
240 // And similarly for STRICT_[SU]INT_TO_FP.
242 if (Subtarget.hasFPExtension())
244 }
245 }
246
247 // Handle i128 if legal.
248 if (isTypeLegal(MVT::i128)) {
249 // No special instructions for these.
265
266 // Support addition/subtraction with carry.
271
272 // Use VPOPCT and add up partial results.
274
275 // We have to use libcalls for these.
284 }
285
286 // Type legalization will convert 8- and 16-bit atomic operations into
287 // forms that operate on i32s (but still keeping the original memory VT).
288 // Lower them into full i32 operations.
300
301 // Whether or not i128 is not a legal type, we need to custom lower
302 // the atomic operations in order to exploit SystemZ instructions.
307
308 // Mark sign/zero extending atomic loads as legal, which will make
309 // DAGCombiner fold extensions into atomic loads if possible.
311 {MVT::i8, MVT::i16, MVT::i32}, Legal);
313 {MVT::i8, MVT::i16}, Legal);
315 MVT::i8, Legal);
316
317 // We can use the CC result of compare-and-swap to implement
318 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
322
324
325 // Traps are legal, as we will convert them to "j .+2".
326 setOperationAction(ISD::TRAP, MVT::Other, Legal);
327
328 // z10 has instructions for signed but not unsigned FP conversion.
329 // Handle unsigned 32-bit types as signed 64-bit types.
330 if (!Subtarget.hasFPExtension()) {
335 }
336
337 // We have native support for a 64-bit CTLZ, via FLOGR.
341
342 // On z15 we have native support for a 64-bit CTPOP.
343 if (Subtarget.hasMiscellaneousExtensions3()) {
346 }
347
348 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
350
351 // Expand 128 bit shifts without using a libcall.
355
356 // Also expand 256 bit shifts if i128 is a legal type.
357 if (isTypeLegal(MVT::i128)) {
361 }
362
363 // Handle bitcast from fp128 to i128.
364 if (!isTypeLegal(MVT::i128))
366
367 // We have native instructions for i8, i16 and i32 extensions, but not i1.
369 for (MVT VT : MVT::integer_valuetypes()) {
373 }
374
375 // Handle the various types of symbolic address.
381
382 // We need to handle dynamic allocations specially because of the
383 // 160-byte area at the bottom of the stack.
386
389
390 // Handle prefetches with PFD or PFDRL.
392
393 // Handle readcyclecounter with STCKF.
395
397 // Assume by default that all vector operations need to be expanded.
398 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
399 if (getOperationAction(Opcode, VT) == Legal)
400 setOperationAction(Opcode, VT, Expand);
401
402 // Likewise all truncating stores and extending loads.
403 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
404 setTruncStoreAction(VT, InnerVT, Expand);
407 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
408 }
409
410 if (isTypeLegal(VT)) {
411 // These operations are legal for anything that can be stored in a
412 // vector register, even if there is no native support for the format
413 // as such. In particular, we can do these for v4f32 even though there
414 // are no specific instructions for that format.
420
421 // Likewise, except that we need to replace the nodes with something
422 // more specific.
425 }
426 }
427
428 // Handle integer vector types.
430 if (isTypeLegal(VT)) {
431 // These operations have direct equivalents.
436 if (VT != MVT::v2i64)
442 if (Subtarget.hasVectorEnhancements1())
444 else
448
449 // Convert a GPR scalar to a vector by inserting it into element 0.
451
452 // Use a series of unpacks for extensions.
455
456 // Detect shifts/rotates by a scalar amount and convert them into
457 // V*_BY_SCALAR.
462
463 // Add ISD::VECREDUCE_ADD as custom in order to implement
464 // it with VZERO+VSUM
466
467 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
468 // and inverting the result as necessary.
470 }
471 }
472
473 if (Subtarget.hasVector()) {
474 // There should be no need to check for float types other than v2f64
475 // since <2 x f32> isn't a legal type.
484
493 }
494
495 if (Subtarget.hasVectorEnhancements2()) {
504
513 }
514
515 // Handle floating-point types.
516 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
517 I <= MVT::LAST_FP_VALUETYPE;
518 ++I) {
520 if (isTypeLegal(VT)) {
521 // We can use FI for FRINT.
523
524 // We can use the extended form of FI for other rounding operations.
525 if (Subtarget.hasFPExtension()) {
531 }
532
533 // No special instructions for these.
539
540 // Special treatment.
542
543 // Handle constrained floating-point operations.
553 if (Subtarget.hasFPExtension()) {
559 }
560 }
561 }
562
563 // Handle floating-point vector types.
564 if (Subtarget.hasVector()) {
565 // Scalar-to-vector conversion is just a subreg.
568
569 // Some insertions and extractions can be done directly but others
570 // need to go via integers.
575
576 // These operations have direct equivalents.
577 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
578 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
579 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
580 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
581 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
582 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
583 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
584 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
585 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
588 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
591
592 // Handle constrained floating-point operations.
605
610 if (Subtarget.hasVectorEnhancements1()) {
613 }
614 }
615
616 // The vector enhancements facility 1 has instructions for these.
617 if (Subtarget.hasVectorEnhancements1()) {
618 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
619 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
620 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
621 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
622 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
623 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
624 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
625 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
626 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
629 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
632
637
642
647
652
657
658 // Handle constrained floating-point operations.
671 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
672 MVT::v4f32, MVT::v2f64 }) {
677 }
678 }
679
680 // We only have fused f128 multiply-addition on vector registers.
681 if (!Subtarget.hasVectorEnhancements1()) {
684 }
685
686 // We don't have a copysign instruction on vector registers.
687 if (Subtarget.hasVectorEnhancements1())
689
690 // Needed so that we don't try to implement f128 constant loads using
691 // a load-and-extend of a f80 constant (in cases where the constant
692 // would fit in an f80).
693 for (MVT VT : MVT::fp_valuetypes())
694 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
695
696 // We don't have extending load instruction on vector registers.
697 if (Subtarget.hasVectorEnhancements1()) {
698 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
699 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
700 }
701
702 // Floating-point truncation and stores need to be done separately.
703 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
704 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
705 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
706
707 // We have 64-bit FPR<->GPR moves, but need special handling for
708 // 32-bit forms.
709 if (!Subtarget.hasVector()) {
712 }
713
714 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
715 // structure, but VAEND is a no-op.
719
720 if (Subtarget.isTargetzOS()) {
721 // Handle address space casts between mixed sized pointers.
724 }
725
727
728 // Codes for which we want to perform some z-specific combinations.
732 ISD::LOAD,
743 ISD::SDIV,
744 ISD::UDIV,
745 ISD::SREM,
746 ISD::UREM,
749
750 // Handle intrinsics.
753
754 // We're not using SJLJ for exception handling, but they're implemented
755 // solely to support use of __builtin_setjmp / __builtin_longjmp.
758
759 // We want to use MVC in preference to even a single load/store pair.
760 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
762
763 // The main memset sequence is a byte store followed by an MVC.
764 // Two STC or MV..I stores win over that, but the kind of fused stores
765 // generated by target-independent code don't when the byte value is
766 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
767 // than "STC;MVC". Handle the choice in target-specific code instead.
768 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
770
771 // Default to having -disable-strictnode-mutation on
772 IsStrictFPEnabled = true;
773
774 if (Subtarget.isTargetzOS()) {
775 struct RTLibCallMapping {
776 RTLIB::Libcall Code;
777 const char *Name;
778 };
779 static RTLibCallMapping RTLibCallCommon[] = {
780#define HANDLE_LIBCALL(code, name) {RTLIB::code, name},
781#include "ZOSLibcallNames.def"
782 };
783 for (auto &E : RTLibCallCommon)
784 setLibcallName(E.Code, E.Name);
785 }
786}
787
789 return Subtarget.hasSoftFloat();
790}
791
793 LLVMContext &, EVT VT) const {
794 if (!VT.isVector())
795 return MVT::i32;
797}
798
800 const MachineFunction &MF, EVT VT) const {
801 if (useSoftFloat())
802 return false;
803
804 VT = VT.getScalarType();
805
806 if (!VT.isSimple())
807 return false;
808
809 switch (VT.getSimpleVT().SimpleTy) {
810 case MVT::f32:
811 case MVT::f64:
812 return true;
813 case MVT::f128:
814 return Subtarget.hasVectorEnhancements1();
815 default:
816 break;
817 }
818
819 return false;
820}
821
822// Return true if the constant can be generated with a vector instruction,
823// such as VGM, VGMB or VREPI.
825 const SystemZSubtarget &Subtarget) {
826 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
827 if (!Subtarget.hasVector() ||
828 (isFP128 && !Subtarget.hasVectorEnhancements1()))
829 return false;
830
831 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
832 // preferred way of creating all-zero and all-one vectors so give it
833 // priority over other methods below.
834 unsigned Mask = 0;
835 unsigned I = 0;
836 for (; I < SystemZ::VectorBytes; ++I) {
837 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
838 if (Byte == 0xff)
839 Mask |= 1ULL << I;
840 else if (Byte != 0)
841 break;
842 }
843 if (I == SystemZ::VectorBytes) {
845 OpVals.push_back(Mask);
847 return true;
848 }
849
850 if (SplatBitSize > 64)
851 return false;
852
853 auto tryValue = [&](uint64_t Value) -> bool {
854 // Try VECTOR REPLICATE IMMEDIATE
855 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
856 if (isInt<16>(SignedValue)) {
857 OpVals.push_back(((unsigned) SignedValue));
860 SystemZ::VectorBits / SplatBitSize);
861 return true;
862 }
863 // Try VECTOR GENERATE MASK
864 unsigned Start, End;
865 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
866 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
867 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
868 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
869 OpVals.push_back(Start - (64 - SplatBitSize));
870 OpVals.push_back(End - (64 - SplatBitSize));
873 SystemZ::VectorBits / SplatBitSize);
874 return true;
875 }
876 return false;
877 };
878
879 // First try assuming that any undefined bits above the highest set bit
880 // and below the lowest set bit are 1s. This increases the likelihood of
881 // being able to use a sign-extended element value in VECTOR REPLICATE
882 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
883 uint64_t SplatBitsZ = SplatBits.getZExtValue();
884 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
885 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
886 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
887 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
888 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
889 if (tryValue(SplatBitsZ | Upper | Lower))
890 return true;
891
892 // Now try assuming that any undefined bits between the first and
893 // last defined set bits are set. This increases the chances of
894 // using a non-wraparound mask.
895 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
896 return tryValue(SplatBitsZ | Middle);
897}
898
900 if (IntImm.isSingleWord()) {
901 IntBits = APInt(128, IntImm.getZExtValue());
902 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
903 } else
904 IntBits = IntImm;
905 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
906
907 // Find the smallest splat.
908 SplatBits = IntImm;
909 unsigned Width = SplatBits.getBitWidth();
910 while (Width > 8) {
911 unsigned HalfSize = Width / 2;
912 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
913 APInt LowValue = SplatBits.trunc(HalfSize);
914
915 // If the two halves do not match, stop here.
916 if (HighValue != LowValue || 8 > HalfSize)
917 break;
918
919 SplatBits = HighValue;
920 Width = HalfSize;
921 }
922 SplatUndef = 0;
923 SplatBitSize = Width;
924}
925
927 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
928 bool HasAnyUndefs;
929
930 // Get IntBits by finding the 128 bit splat.
931 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
932 true);
933
934 // Get SplatBits by finding the 8 bit or greater splat.
935 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
936 true);
937}
938
940 bool ForCodeSize) const {
941 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
942 if (Imm.isZero() || Imm.isNegZero())
943 return true;
944
946}
947
950 MachineBasicBlock *MBB) const {
951 DebugLoc DL = MI.getDebugLoc();
952 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
953 const SystemZRegisterInfo *TRI = Subtarget.getRegisterInfo();
954
957
958 const BasicBlock *BB = MBB->getBasicBlock();
960
961 Register DstReg = MI.getOperand(0).getReg();
962 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
963 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
964 (void)TRI;
965 Register mainDstReg = MRI.createVirtualRegister(RC);
966 Register restoreDstReg = MRI.createVirtualRegister(RC);
967
968 MVT PVT = getPointerTy(MF->getDataLayout());
969 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
970 // For v = setjmp(buf), we generate.
971 // Algorithm:
972 //
973 // ---------
974 // | thisMBB |
975 // ---------
976 // |
977 // ------------------------
978 // | |
979 // ---------- ---------------
980 // | mainMBB | | restoreMBB |
981 // | v = 0 | | v = 1 |
982 // ---------- ---------------
983 // | |
984 // -------------------------
985 // |
986 // -----------------------------
987 // | sinkMBB |
988 // | phi(v_mainMBB,v_restoreMBB) |
989 // -----------------------------
990 // thisMBB:
991 // buf[FPOffset] = Frame Pointer if hasFP.
992 // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB.
993 // buf[BCOffset] = Backchain value if building with -mbackchain.
994 // buf[SPOffset] = Stack Pointer.
995 // buf[LPOffset] = We never write this slot with R13, gcc stores R13 always.
996 // SjLjSetup restoreMBB
997 // mainMBB:
998 // v_main = 0
999 // sinkMBB:
1000 // v = phi(v_main, v_restore)
1001 // restoreMBB:
1002 // v_restore = 1
1003
1004 MachineBasicBlock *thisMBB = MBB;
1005 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
1006 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
1007 MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB);
1008
1009 MF->insert(I, mainMBB);
1010 MF->insert(I, sinkMBB);
1011 MF->push_back(restoreMBB);
1012 restoreMBB->setMachineBlockAddressTaken();
1013
1015
1016 // Transfer the remainder of BB and its successor edges to sinkMBB.
1017 sinkMBB->splice(sinkMBB->begin(), MBB,
1018 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
1020
1021 // thisMBB:
1022 const int64_t FPOffset = 0; // Slot 1.
1023 const int64_t LabelOffset = 1 * PVT.getStoreSize(); // Slot 2.
1024 const int64_t BCOffset = 2 * PVT.getStoreSize(); // Slot 3.
1025 const int64_t SPOffset = 3 * PVT.getStoreSize(); // Slot 4.
1026
1027 // Buf address.
1028 Register BufReg = MI.getOperand(1).getReg();
1029
1030 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
1031 unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
1032
1033 // Prepare IP for longjmp.
1034 BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::LARL), LabelReg)
1035 .addMBB(restoreMBB);
1036 // Store IP for return from jmp, slot 2, offset = 1.
1037 BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::STG))
1038 .addReg(LabelReg)
1039 .addReg(BufReg)
1040 .addImm(LabelOffset)
1041 .addReg(0);
1042
1043 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1044 bool HasFP = Subtarget.getFrameLowering()->hasFP(*MF);
1045 if (HasFP) {
1046 BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::STG))
1047 .addReg(SpecialRegs->getFramePointerRegister())
1048 .addReg(BufReg)
1049 .addImm(FPOffset)
1050 .addReg(0);
1051 }
1052
1053 // Store SP.
1054 BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::STG))
1055 .addReg(SpecialRegs->getStackPointerRegister())
1056 .addReg(BufReg)
1057 .addImm(SPOffset)
1058 .addReg(0);
1059
1060 // Slot 3(Offset = 2) Backchain value (if building with -mbackchain).
1061 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1062 if (BackChain) {
1063 Register BCReg = MRI.createVirtualRegister(PtrRC);
1064 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1065 MIB = BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1066 .addReg(SpecialRegs->getStackPointerRegister())
1067 .addImm(TFL->getBackchainOffset(*MF))
1068 .addReg(0);
1069
1070 BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::STG))
1071 .addReg(BCReg)
1072 .addReg(BufReg)
1073 .addImm(BCOffset)
1074 .addReg(0);
1075 }
1076
1077 // Setup.
1078 MIB = BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::EH_SjLj_Setup))
1079 .addMBB(restoreMBB);
1080
1081 const SystemZRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1082 MIB.addRegMask(RegInfo->getNoPreservedMask());
1083
1084 thisMBB->addSuccessor(mainMBB);
1085 thisMBB->addSuccessor(restoreMBB);
1086
1087 // mainMBB:
1088 BuildMI(mainMBB, DL, TII->get(SystemZ::LHI), mainDstReg).addImm(0);
1089 mainMBB->addSuccessor(sinkMBB);
1090
1091 // sinkMBB:
1092 BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(SystemZ::PHI), DstReg)
1093 .addReg(mainDstReg)
1094 .addMBB(mainMBB)
1095 .addReg(restoreDstReg)
1096 .addMBB(restoreMBB);
1097
1098 // restoreMBB.
1099 BuildMI(restoreMBB, DL, TII->get(SystemZ::LHI), restoreDstReg).addImm(1);
1100 BuildMI(restoreMBB, DL, TII->get(SystemZ::J)).addMBB(sinkMBB);
1101 restoreMBB->addSuccessor(sinkMBB);
1102
1103 MI.eraseFromParent();
1104
1105 return sinkMBB;
1106}
1107
1110 MachineBasicBlock *MBB) const {
1111
1112 DebugLoc DL = MI.getDebugLoc();
1113 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1114
1115 MachineFunction *MF = MBB->getParent();
1117
1118 MVT PVT = getPointerTy(MF->getDataLayout());
1119 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1120 Register BufReg = MI.getOperand(0).getReg();
1121 const TargetRegisterClass *RC = MRI.getRegClass(BufReg);
1122 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1123
1124 Register Tmp = MRI.createVirtualRegister(RC);
1125 Register BCReg = MRI.createVirtualRegister(RC);
1126
1128
1129 const int64_t FPOffset = 0;
1130 const int64_t LabelOffset = 1 * PVT.getStoreSize();
1131 const int64_t BCOffset = 2 * PVT.getStoreSize();
1132 const int64_t SPOffset = 3 * PVT.getStoreSize();
1133 const int64_t LPOffset = 4 * PVT.getStoreSize();
1134
1135 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), Tmp)
1136 .addReg(BufReg)
1137 .addImm(LabelOffset)
1138 .addReg(0);
1139
1140 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1141 SpecialRegs->getFramePointerRegister())
1142 .addReg(BufReg)
1143 .addImm(FPOffset)
1144 .addReg(0);
1145
1146 // We are restoring R13 even though we never stored in setjmp from llvm,
1147 // as gcc always stores R13 in builtin_setjmp. We could have mixed code
1148 // gcc setjmp and llvm longjmp.
1149 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), SystemZ::R13D)
1150 .addReg(BufReg)
1151 .addImm(LPOffset)
1152 .addReg(0);
1153
1154 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1155 if (BackChain) {
1156 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1157 .addReg(BufReg)
1158 .addImm(BCOffset)
1159 .addReg(0);
1160 }
1161
1162 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1163 SpecialRegs->getStackPointerRegister())
1164 .addReg(BufReg)
1165 .addImm(SPOffset)
1166 .addReg(0);
1167
1168 if (BackChain) {
1169 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1170 BuildMI(*MBB, MI, DL, TII->get(SystemZ::STG))
1171 .addReg(BCReg)
1172 .addReg(SpecialRegs->getStackPointerRegister())
1173 .addImm(TFL->getBackchainOffset(*MF))
1174 .addReg(0);
1175 }
1176
1177 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BR)).addReg(Tmp);
1178
1179 MI.eraseFromParent();
1180 return MBB;
1181}
1182
1183/// Returns true if stack probing through inline assembly is requested.
1185 // If the function specifically requests inline stack probes, emit them.
1186 if (MF.getFunction().hasFnAttribute("probe-stack"))
1187 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
1188 "inline-asm";
1189 return false;
1190}
1191
1195}
1196
1200}
1201
1204 // Don't expand subword operations as they require special treatment.
1205 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
1207
1208 // Don't expand if there is a target instruction available.
1209 if (Subtarget.hasInterlockedAccess1() &&
1210 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
1217
1219}
1220
1222 // We can use CGFI or CLGFI.
1223 return isInt<32>(Imm) || isUInt<32>(Imm);
1224}
1225
1227 // We can use ALGFI or SLGFI.
1228 return isUInt<32>(Imm) || isUInt<32>(-Imm);
1229}
1230
1232 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
1233 // Unaligned accesses should never be slower than the expanded version.
1234 // We check specifically for aligned accesses in the few cases where
1235 // they are required.
1236 if (Fast)
1237 *Fast = 1;
1238 return true;
1239}
1240
1241// Information about the addressing mode for a memory access.
1243 // True if a long displacement is supported.
1245
1246 // True if use of index register is supported.
1248
1249 AddressingMode(bool LongDispl, bool IdxReg) :
1250 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
1251};
1252
1253// Return the desired addressing mode for a Load which has only one use (in
1254// the same block) which is a Store.
1256 Type *Ty) {
1257 // With vector support a Load->Store combination may be combined to either
1258 // an MVC or vector operations and it seems to work best to allow the
1259 // vector addressing mode.
1260 if (HasVector)
1261 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1262
1263 // Otherwise only the MVC case is special.
1264 bool MVC = Ty->isIntegerTy(8);
1265 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1266}
1267
1268// Return the addressing mode which seems most desirable given an LLVM
1269// Instruction pointer.
1270static AddressingMode
1272 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1273 switch (II->getIntrinsicID()) {
1274 default: break;
1275 case Intrinsic::memset:
1276 case Intrinsic::memmove:
1277 case Intrinsic::memcpy:
1278 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1279 }
1280 }
1281
1282 if (isa<LoadInst>(I) && I->hasOneUse()) {
1283 auto *SingleUser = cast<Instruction>(*I->user_begin());
1284 if (SingleUser->getParent() == I->getParent()) {
1285 if (isa<ICmpInst>(SingleUser)) {
1286 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1287 if (C->getBitWidth() <= 64 &&
1288 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1289 // Comparison of memory with 16 bit signed / unsigned immediate
1290 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1291 } else if (isa<StoreInst>(SingleUser))
1292 // Load->Store
1293 return getLoadStoreAddrMode(HasVector, I->getType());
1294 }
1295 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1296 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1297 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1298 // Load->Store
1299 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1300 }
1301
1302 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1303
1304 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1305 // dependencies (LDE only supports small offsets).
1306 // * Utilize the vector registers to hold floating point
1307 // values (vector load / store instructions only support small
1308 // offsets).
1309
1310 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1311 I->getOperand(0)->getType());
1312 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1313 bool IsVectorAccess = MemAccessTy->isVectorTy();
1314
1315 // A store of an extracted vector element will be combined into a VSTE type
1316 // instruction.
1317 if (!IsVectorAccess && isa<StoreInst>(I)) {
1318 Value *DataOp = I->getOperand(0);
1319 if (isa<ExtractElementInst>(DataOp))
1320 IsVectorAccess = true;
1321 }
1322
1323 // A load which gets inserted into a vector element will be combined into a
1324 // VLE type instruction.
1325 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1326 User *LoadUser = *I->user_begin();
1327 if (isa<InsertElementInst>(LoadUser))
1328 IsVectorAccess = true;
1329 }
1330
1331 if (IsFPAccess || IsVectorAccess)
1332 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1333 }
1334
1335 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1336}
1337
1339 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1340 // Punt on globals for now, although they can be used in limited
1341 // RELATIVE LONG cases.
1342 if (AM.BaseGV)
1343 return false;
1344
1345 // Require a 20-bit signed offset.
1346 if (!isInt<20>(AM.BaseOffs))
1347 return false;
1348
1349 bool RequireD12 =
1350 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1351 AddressingMode SupportedAM(!RequireD12, true);
1352 if (I != nullptr)
1353 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1354
1355 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1356 return false;
1357
1358 if (!SupportedAM.IndexReg)
1359 // No indexing allowed.
1360 return AM.Scale == 0;
1361 else
1362 // Indexing is OK but no scale factor can be applied.
1363 return AM.Scale == 0 || AM.Scale == 1;
1364}
1365
1367 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
1368 unsigned SrcAS, const AttributeList &FuncAttributes) const {
1369 const int MVCFastLen = 16;
1370
1371 if (Limit != ~unsigned(0)) {
1372 // Don't expand Op into scalar loads/stores in these cases:
1373 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1374 return false; // Small memcpy: Use MVC
1375 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1376 return false; // Small memset (first byte with STC/MVI): Use MVC
1377 if (Op.isZeroMemset())
1378 return false; // Memset zero: Use XC
1379 }
1380
1381 return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
1382 SrcAS, FuncAttributes);
1383}
1384
1386 const AttributeList &FuncAttributes) const {
1387 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1388}
1389
1390bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1391 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1392 return false;
1393 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1394 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1395 return FromBits > ToBits;
1396}
1397
1399 if (!FromVT.isInteger() || !ToVT.isInteger())
1400 return false;
1401 unsigned FromBits = FromVT.getFixedSizeInBits();
1402 unsigned ToBits = ToVT.getFixedSizeInBits();
1403 return FromBits > ToBits;
1404}
1405
1406//===----------------------------------------------------------------------===//
1407// Inline asm support
1408//===----------------------------------------------------------------------===//
1409
1412 if (Constraint.size() == 1) {
1413 switch (Constraint[0]) {
1414 case 'a': // Address register
1415 case 'd': // Data register (equivalent to 'r')
1416 case 'f': // Floating-point register
1417 case 'h': // High-part register
1418 case 'r': // General-purpose register
1419 case 'v': // Vector register
1420 return C_RegisterClass;
1421
1422 case 'Q': // Memory with base and unsigned 12-bit displacement
1423 case 'R': // Likewise, plus an index
1424 case 'S': // Memory with base and signed 20-bit displacement
1425 case 'T': // Likewise, plus an index
1426 case 'm': // Equivalent to 'T'.
1427 return C_Memory;
1428
1429 case 'I': // Unsigned 8-bit constant
1430 case 'J': // Unsigned 12-bit constant
1431 case 'K': // Signed 16-bit constant
1432 case 'L': // Signed 20-bit displacement (on all targets we support)
1433 case 'M': // 0x7fffffff
1434 return C_Immediate;
1435
1436 default:
1437 break;
1438 }
1439 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1440 switch (Constraint[1]) {
1441 case 'Q': // Address with base and unsigned 12-bit displacement
1442 case 'R': // Likewise, plus an index
1443 case 'S': // Address with base and signed 20-bit displacement
1444 case 'T': // Likewise, plus an index
1445 return C_Address;
1446
1447 default:
1448 break;
1449 }
1450 }
1451 return TargetLowering::getConstraintType(Constraint);
1452}
1453
1456 const char *constraint) const {
1458 Value *CallOperandVal = info.CallOperandVal;
1459 // If we don't have a value, we can't do a match,
1460 // but allow it at the lowest weight.
1461 if (!CallOperandVal)
1462 return CW_Default;
1463 Type *type = CallOperandVal->getType();
1464 // Look at the constraint type.
1465 switch (*constraint) {
1466 default:
1468 break;
1469
1470 case 'a': // Address register
1471 case 'd': // Data register (equivalent to 'r')
1472 case 'h': // High-part register
1473 case 'r': // General-purpose register
1474 weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1475 break;
1476
1477 case 'f': // Floating-point register
1478 if (!useSoftFloat())
1479 weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1480 break;
1481
1482 case 'v': // Vector register
1483 if (Subtarget.hasVector())
1484 weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1485 : CW_Default;
1486 break;
1487
1488 case 'I': // Unsigned 8-bit constant
1489 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1490 if (isUInt<8>(C->getZExtValue()))
1491 weight = CW_Constant;
1492 break;
1493
1494 case 'J': // Unsigned 12-bit constant
1495 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1496 if (isUInt<12>(C->getZExtValue()))
1497 weight = CW_Constant;
1498 break;
1499
1500 case 'K': // Signed 16-bit constant
1501 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1502 if (isInt<16>(C->getSExtValue()))
1503 weight = CW_Constant;
1504 break;
1505
1506 case 'L': // Signed 20-bit displacement (on all targets we support)
1507 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1508 if (isInt<20>(C->getSExtValue()))
1509 weight = CW_Constant;
1510 break;
1511
1512 case 'M': // 0x7fffffff
1513 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1514 if (C->getZExtValue() == 0x7fffffff)
1515 weight = CW_Constant;
1516 break;
1517 }
1518 return weight;
1519}
1520
1521// Parse a "{tNNN}" register constraint for which the register type "t"
1522// has already been verified. MC is the class associated with "t" and
1523// Map maps 0-based register numbers to LLVM register numbers.
1524static std::pair<unsigned, const TargetRegisterClass *>
1526 const unsigned *Map, unsigned Size) {
1527 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1528 if (isdigit(Constraint[2])) {
1529 unsigned Index;
1530 bool Failed =
1531 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1532 if (!Failed && Index < Size && Map[Index])
1533 return std::make_pair(Map[Index], RC);
1534 }
1535 return std::make_pair(0U, nullptr);
1536}
1537
1538std::pair<unsigned, const TargetRegisterClass *>
1540 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1541 if (Constraint.size() == 1) {
1542 // GCC Constraint Letters
1543 switch (Constraint[0]) {
1544 default: break;
1545 case 'd': // Data register (equivalent to 'r')
1546 case 'r': // General-purpose register
1547 if (VT.getSizeInBits() == 64)
1548 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1549 else if (VT.getSizeInBits() == 128)
1550 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1551 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1552
1553 case 'a': // Address register
1554 if (VT == MVT::i64)
1555 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1556 else if (VT == MVT::i128)
1557 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1558 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1559
1560 case 'h': // High-part register (an LLVM extension)
1561 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1562
1563 case 'f': // Floating-point register
1564 if (!useSoftFloat()) {
1565 if (VT.getSizeInBits() == 64)
1566 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1567 else if (VT.getSizeInBits() == 128)
1568 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1569 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1570 }
1571 break;
1572
1573 case 'v': // Vector register
1574 if (Subtarget.hasVector()) {
1575 if (VT.getSizeInBits() == 32)
1576 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1577 if (VT.getSizeInBits() == 64)
1578 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1579 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1580 }
1581 break;
1582 }
1583 }
1584 if (Constraint.starts_with("{")) {
1585
1586 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1587 // to check the size on.
1588 auto getVTSizeInBits = [&VT]() {
1589 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1590 };
1591
1592 // We need to override the default register parsing for GPRs and FPRs
1593 // because the interpretation depends on VT. The internal names of
1594 // the registers are also different from the external names
1595 // (F0D and F0S instead of F0, etc.).
1596 if (Constraint[1] == 'r') {
1597 if (getVTSizeInBits() == 32)
1598 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1600 if (getVTSizeInBits() == 128)
1601 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1603 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1605 }
1606 if (Constraint[1] == 'f') {
1607 if (useSoftFloat())
1608 return std::make_pair(
1609 0u, static_cast<const TargetRegisterClass *>(nullptr));
1610 if (getVTSizeInBits() == 32)
1611 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1613 if (getVTSizeInBits() == 128)
1614 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1616 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1618 }
1619 if (Constraint[1] == 'v') {
1620 if (!Subtarget.hasVector())
1621 return std::make_pair(
1622 0u, static_cast<const TargetRegisterClass *>(nullptr));
1623 if (getVTSizeInBits() == 32)
1624 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1626 if (getVTSizeInBits() == 64)
1627 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1629 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1631 }
1632 }
1633 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1634}
1635
1636// FIXME? Maybe this could be a TableGen attribute on some registers and
1637// this table could be generated automatically from RegInfo.
1640 const MachineFunction &MF) const {
1641 Register Reg =
1643 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D
1644 : SystemZ::NoRegister)
1645 .Case("r15",
1646 Subtarget.isTargetELF() ? SystemZ::R15D : SystemZ::NoRegister)
1647 .Default(SystemZ::NoRegister);
1648
1649 if (Reg)
1650 return Reg;
1651 report_fatal_error("Invalid register name global variable");
1652}
1653
1655 const Constant *PersonalityFn) const {
1656 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1657}
1658
1660 const Constant *PersonalityFn) const {
1661 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1662}
1663
1665 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1666 SelectionDAG &DAG) const {
1667 // Only support length 1 constraints for now.
1668 if (Constraint.size() == 1) {
1669 switch (Constraint[0]) {
1670 case 'I': // Unsigned 8-bit constant
1671 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1672 if (isUInt<8>(C->getZExtValue()))
1673 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1674 Op.getValueType()));
1675 return;
1676
1677 case 'J': // Unsigned 12-bit constant
1678 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1679 if (isUInt<12>(C->getZExtValue()))
1680 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1681 Op.getValueType()));
1682 return;
1683
1684 case 'K': // Signed 16-bit constant
1685 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1686 if (isInt<16>(C->getSExtValue()))
1687 Ops.push_back(DAG.getSignedTargetConstant(
1688 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1689 return;
1690
1691 case 'L': // Signed 20-bit displacement (on all targets we support)
1692 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1693 if (isInt<20>(C->getSExtValue()))
1694 Ops.push_back(DAG.getSignedTargetConstant(
1695 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1696 return;
1697
1698 case 'M': // 0x7fffffff
1699 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1700 if (C->getZExtValue() == 0x7fffffff)
1701 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1702 Op.getValueType()));
1703 return;
1704 }
1705 }
1706 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1707}
1708
1709//===----------------------------------------------------------------------===//
1710// Calling conventions
1711//===----------------------------------------------------------------------===//
1712
1713#include "SystemZGenCallingConv.inc"
1714
1716 CallingConv::ID) const {
1717 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1718 SystemZ::R14D, 0 };
1719 return ScratchRegs;
1720}
1721
1723 Type *ToType) const {
1724 return isTruncateFree(FromType, ToType);
1725}
1726
1728 return CI->isTailCall();
1729}
1730
1731// Value is a value that has been passed to us in the location described by VA
1732// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1733// any loads onto Chain.
1735 CCValAssign &VA, SDValue Chain,
1736 SDValue Value) {
1737 // If the argument has been promoted from a smaller type, insert an
1738 // assertion to capture this.
1739 if (VA.getLocInfo() == CCValAssign::SExt)
1741 DAG.getValueType(VA.getValVT()));
1742 else if (VA.getLocInfo() == CCValAssign::ZExt)
1744 DAG.getValueType(VA.getValVT()));
1745
1746 if (VA.isExtInLoc())
1747 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1748 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1749 // If this is a short vector argument loaded from the stack,
1750 // extend from i64 to full vector size and then bitcast.
1751 assert(VA.getLocVT() == MVT::i64);
1752 assert(VA.getValVT().isVector());
1753 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1754 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1755 } else
1756 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1757 return Value;
1758}
1759
1760// Value is a value of type VA.getValVT() that we need to copy into
1761// the location described by VA. Return a copy of Value converted to
1762// VA.getValVT(). The caller is responsible for handling indirect values.
1764 CCValAssign &VA, SDValue Value) {
1765 switch (VA.getLocInfo()) {
1766 case CCValAssign::SExt:
1767 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1768 case CCValAssign::ZExt:
1769 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1770 case CCValAssign::AExt:
1771 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1772 case CCValAssign::BCvt: {
1773 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1774 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1775 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1776 // For an f32 vararg we need to first promote it to an f64 and then
1777 // bitcast it to an i64.
1778 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1779 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1780 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1781 ? MVT::v2i64
1782 : VA.getLocVT();
1783 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1784 // For ELF, this is a short vector argument to be stored to the stack,
1785 // bitcast to v2i64 and then extract first element.
1786 if (BitCastToType == MVT::v2i64)
1787 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1788 DAG.getConstant(0, DL, MVT::i32));
1789 return Value;
1790 }
1791 case CCValAssign::Full:
1792 return Value;
1793 default:
1794 llvm_unreachable("Unhandled getLocInfo()");
1795 }
1796}
1797
1799 SDLoc DL(In);
1800 SDValue Lo, Hi;
1801 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1802 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1803 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1804 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1805 DAG.getConstant(64, DL, MVT::i32)));
1806 } else {
1807 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1808 }
1809
1810 // FIXME: If v2i64 were a legal type, we could use it instead of
1811 // Untyped here. This might enable improved folding.
1812 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1813 MVT::Untyped, Hi, Lo);
1814 return SDValue(Pair, 0);
1815}
1816
1818 SDLoc DL(In);
1819 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1820 DL, MVT::i64, In);
1821 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1822 DL, MVT::i64, In);
1823
1824 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1825 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1826 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1827 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1828 DAG.getConstant(64, DL, MVT::i32));
1829 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1830 } else {
1831 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1832 }
1833}
1834
1836 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1837 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1838 EVT ValueVT = Val.getValueType();
1839 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1840 // Inline assembly operand.
1841 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1842 return true;
1843 }
1844
1845 return false;
1846}
1847
1849 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1850 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1851 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1852 // Inline assembly operand.
1853 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1854 return DAG.getBitcast(ValueVT, Res);
1855 }
1856
1857 return SDValue();
1858}
1859
1861 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1862 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1863 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1865 MachineFrameInfo &MFI = MF.getFrameInfo();
1867 SystemZMachineFunctionInfo *FuncInfo =
1869 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1870 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1871
1872 // Assign locations to all of the incoming arguments.
1874 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1875 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1876 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
1877
1878 unsigned NumFixedGPRs = 0;
1879 unsigned NumFixedFPRs = 0;
1880 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1881 SDValue ArgValue;
1882 CCValAssign &VA = ArgLocs[I];
1883 EVT LocVT = VA.getLocVT();
1884 if (VA.isRegLoc()) {
1885 // Arguments passed in registers
1886 const TargetRegisterClass *RC;
1887 switch (LocVT.getSimpleVT().SimpleTy) {
1888 default:
1889 // Integers smaller than i64 should be promoted to i64.
1890 llvm_unreachable("Unexpected argument type");
1891 case MVT::i32:
1892 NumFixedGPRs += 1;
1893 RC = &SystemZ::GR32BitRegClass;
1894 break;
1895 case MVT::i64:
1896 NumFixedGPRs += 1;
1897 RC = &SystemZ::GR64BitRegClass;
1898 break;
1899 case MVT::f32:
1900 NumFixedFPRs += 1;
1901 RC = &SystemZ::FP32BitRegClass;
1902 break;
1903 case MVT::f64:
1904 NumFixedFPRs += 1;
1905 RC = &SystemZ::FP64BitRegClass;
1906 break;
1907 case MVT::f128:
1908 NumFixedFPRs += 2;
1909 RC = &SystemZ::FP128BitRegClass;
1910 break;
1911 case MVT::v16i8:
1912 case MVT::v8i16:
1913 case MVT::v4i32:
1914 case MVT::v2i64:
1915 case MVT::v4f32:
1916 case MVT::v2f64:
1917 RC = &SystemZ::VR128BitRegClass;
1918 break;
1919 }
1920
1921 Register VReg = MRI.createVirtualRegister(RC);
1922 MRI.addLiveIn(VA.getLocReg(), VReg);
1923 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1924 } else {
1925 assert(VA.isMemLoc() && "Argument not register or memory");
1926
1927 // Create the frame index object for this incoming parameter.
1928 // FIXME: Pre-include call frame size in the offset, should not
1929 // need to manually add it here.
1930 int64_t ArgSPOffset = VA.getLocMemOffset();
1931 if (Subtarget.isTargetXPLINK64()) {
1932 auto &XPRegs =
1934 ArgSPOffset += XPRegs.getCallFrameSize();
1935 }
1936 int FI =
1937 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1938
1939 // Create the SelectionDAG nodes corresponding to a load
1940 // from this parameter. Unpromoted ints and floats are
1941 // passed as right-justified 8-byte values.
1942 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1943 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1944 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1945 DAG.getIntPtrConstant(4, DL));
1946 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1948 }
1949
1950 // Convert the value of the argument register into the value that's
1951 // being passed.
1952 if (VA.getLocInfo() == CCValAssign::Indirect) {
1953 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1955 // If the original argument was split (e.g. i128), we need
1956 // to load all parts of it here (using the same address).
1957 unsigned ArgIndex = Ins[I].OrigArgIndex;
1958 assert (Ins[I].PartOffset == 0);
1959 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1960 CCValAssign &PartVA = ArgLocs[I + 1];
1961 unsigned PartOffset = Ins[I + 1].PartOffset;
1962 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1963 DAG.getIntPtrConstant(PartOffset, DL));
1964 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1966 ++I;
1967 }
1968 } else
1969 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1970 }
1971
1972 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
1973 // Save the number of non-varargs registers for later use by va_start, etc.
1974 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1975 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1976
1977 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1978 Subtarget.getSpecialRegisters());
1979
1980 // Likewise the address (in the form of a frame index) of where the
1981 // first stack vararg would be. The 1-byte size here is arbitrary.
1982 // FIXME: Pre-include call frame size in the offset, should not
1983 // need to manually add it here.
1984 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
1985 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
1986 FuncInfo->setVarArgsFrameIndex(FI);
1987 }
1988
1989 if (IsVarArg && Subtarget.isTargetELF()) {
1990 // Save the number of non-varargs registers for later use by va_start, etc.
1991 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1992 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1993
1994 // Likewise the address (in the form of a frame index) of where the
1995 // first stack vararg would be. The 1-byte size here is arbitrary.
1996 int64_t VarArgsOffset = CCInfo.getStackSize();
1997 FuncInfo->setVarArgsFrameIndex(
1998 MFI.CreateFixedObject(1, VarArgsOffset, true));
1999
2000 // ...and a similar frame index for the caller-allocated save area
2001 // that will be used to store the incoming registers.
2002 int64_t RegSaveOffset =
2003 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
2004 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
2005 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
2006
2007 // Store the FPR varargs in the reserved frame slots. (We store the
2008 // GPRs as part of the prologue.)
2009 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
2011 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
2012 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
2013 int FI =
2015 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2017 &SystemZ::FP64BitRegClass);
2018 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
2019 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
2021 }
2022 // Join the stores, which are independent of one another.
2023 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2024 ArrayRef(&MemOps[NumFixedFPRs],
2025 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
2026 }
2027 }
2028
2029 if (Subtarget.isTargetXPLINK64()) {
2030 // Create virual register for handling incoming "ADA" special register (R5)
2031 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
2032 Register ADAvReg = MRI.createVirtualRegister(RC);
2033 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2034 Subtarget.getSpecialRegisters());
2035 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
2036 FuncInfo->setADAVirtualRegister(ADAvReg);
2037 }
2038 return Chain;
2039}
2040
2041static bool canUseSiblingCall(const CCState &ArgCCInfo,
2044 // Punt if there are any indirect or stack arguments, or if the call
2045 // needs the callee-saved argument register R6, or if the call uses
2046 // the callee-saved register arguments SwiftSelf and SwiftError.
2047 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2048 CCValAssign &VA = ArgLocs[I];
2050 return false;
2051 if (!VA.isRegLoc())
2052 return false;
2053 Register Reg = VA.getLocReg();
2054 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
2055 return false;
2056 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
2057 return false;
2058 }
2059 return true;
2060}
2061
2063 unsigned Offset, bool LoadAdr = false) {
2066 unsigned ADAvReg = MFI->getADAVirtualRegister();
2068
2069 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
2070 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
2071
2072 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
2073 if (!LoadAdr)
2074 Result = DAG.getLoad(
2075 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
2077
2078 return Result;
2079}
2080
2081// ADA access using Global value
2082// Note: for functions, address of descriptor is returned
2084 EVT PtrVT) {
2085 unsigned ADAtype;
2086 bool LoadAddr = false;
2087 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
2088 bool IsFunction =
2089 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
2090 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
2091
2092 if (IsFunction) {
2093 if (IsInternal) {
2095 LoadAddr = true;
2096 } else
2098 } else {
2100 }
2101 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
2102
2103 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
2104}
2105
2106static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
2107 SDLoc &DL, SDValue &Chain) {
2108 unsigned ADADelta = 0; // ADA offset in desc.
2109 unsigned EPADelta = 8; // EPA offset in desc.
2112
2113 // XPLink calling convention.
2114 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2115 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
2116 G->getGlobal()->hasPrivateLinkage());
2117 if (IsInternal) {
2120 unsigned ADAvReg = MFI->getADAVirtualRegister();
2121 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
2122 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2123 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2124 return true;
2125 } else {
2127 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2128 ADA = getADAEntry(DAG, GA, DL, ADADelta);
2129 Callee = getADAEntry(DAG, GA, DL, EPADelta);
2130 }
2131 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2133 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2134 ADA = getADAEntry(DAG, ES, DL, ADADelta);
2135 Callee = getADAEntry(DAG, ES, DL, EPADelta);
2136 } else {
2137 // Function pointer case
2138 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2139 DAG.getConstant(ADADelta, DL, PtrVT));
2140 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
2142 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2143 DAG.getConstant(EPADelta, DL, PtrVT));
2144 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
2146 }
2147 return false;
2148}
2149
2150SDValue
2152 SmallVectorImpl<SDValue> &InVals) const {
2153 SelectionDAG &DAG = CLI.DAG;
2154 SDLoc &DL = CLI.DL;
2156 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2158 SDValue Chain = CLI.Chain;
2159 SDValue Callee = CLI.Callee;
2160 bool &IsTailCall = CLI.IsTailCall;
2161 CallingConv::ID CallConv = CLI.CallConv;
2162 bool IsVarArg = CLI.IsVarArg;
2164 EVT PtrVT = getPointerTy(MF.getDataLayout());
2165 LLVMContext &Ctx = *DAG.getContext();
2167
2168 // FIXME: z/OS support to be added in later.
2169 if (Subtarget.isTargetXPLINK64())
2170 IsTailCall = false;
2171
2172 // Integer args <=32 bits should have an extension attribute.
2173 verifyNarrowIntegerArgs_Call(Outs, &MF.getFunction(), Callee);
2174
2175 // Analyze the operands of the call, assigning locations to each operand.
2177 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
2178 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
2179
2180 // We don't support GuaranteedTailCallOpt, only automatically-detected
2181 // sibling calls.
2182 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
2183 IsTailCall = false;
2184
2185 // Get a count of how many bytes are to be pushed on the stack.
2186 unsigned NumBytes = ArgCCInfo.getStackSize();
2187
2188 // Mark the start of the call.
2189 if (!IsTailCall)
2190 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
2191
2192 // Copy argument values to their designated locations.
2194 SmallVector<SDValue, 8> MemOpChains;
2195 SDValue StackPtr;
2196 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2197 CCValAssign &VA = ArgLocs[I];
2198 SDValue ArgValue = OutVals[I];
2199
2200 if (VA.getLocInfo() == CCValAssign::Indirect) {
2201 // Store the argument in a stack slot and pass its address.
2202 unsigned ArgIndex = Outs[I].OrigArgIndex;
2203 EVT SlotVT;
2204 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
2205 // Allocate the full stack space for a promoted (and split) argument.
2206 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
2207 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
2208 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
2209 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
2210 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
2211 } else {
2212 SlotVT = Outs[I].VT;
2213 }
2214 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
2215 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2216 MemOpChains.push_back(
2217 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
2219 // If the original argument was split (e.g. i128), we need
2220 // to store all parts of it here (and pass just one address).
2221 assert (Outs[I].PartOffset == 0);
2222 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
2223 SDValue PartValue = OutVals[I + 1];
2224 unsigned PartOffset = Outs[I + 1].PartOffset;
2225 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
2226 DAG.getIntPtrConstant(PartOffset, DL));
2227 MemOpChains.push_back(
2228 DAG.getStore(Chain, DL, PartValue, Address,
2230 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
2231 SlotVT.getStoreSize()) && "Not enough space for argument part!");
2232 ++I;
2233 }
2234 ArgValue = SpillSlot;
2235 } else
2236 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
2237
2238 if (VA.isRegLoc()) {
2239 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
2240 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
2241 // and low values.
2242 if (VA.getLocVT() == MVT::i128)
2243 ArgValue = lowerI128ToGR128(DAG, ArgValue);
2244 // Queue up the argument copies and emit them at the end.
2245 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
2246 } else {
2247 assert(VA.isMemLoc() && "Argument not register or memory");
2248
2249 // Work out the address of the stack slot. Unpromoted ints and
2250 // floats are passed as right-justified 8-byte values.
2251 if (!StackPtr.getNode())
2252 StackPtr = DAG.getCopyFromReg(Chain, DL,
2253 Regs->getStackPointerRegister(), PtrVT);
2254 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
2255 VA.getLocMemOffset();
2256 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
2257 Offset += 4;
2258 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2260
2261 // Emit the store.
2262 MemOpChains.push_back(
2263 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2264
2265 // Although long doubles or vectors are passed through the stack when
2266 // they are vararg (non-fixed arguments), if a long double or vector
2267 // occupies the third and fourth slot of the argument list GPR3 should
2268 // still shadow the third slot of the argument list.
2269 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2270 SDValue ShadowArgValue =
2271 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2272 DAG.getIntPtrConstant(1, DL));
2273 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2274 }
2275 }
2276 }
2277
2278 // Join the stores, which are independent of one another.
2279 if (!MemOpChains.empty())
2280 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2281
2282 // Accept direct calls by converting symbolic call addresses to the
2283 // associated Target* opcodes. Force %r1 to be used for indirect
2284 // tail calls.
2285 SDValue Glue;
2286
2287 if (Subtarget.isTargetXPLINK64()) {
2288 SDValue ADA;
2289 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2290 if (!IsBRASL) {
2291 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2292 ->getAddressOfCalleeRegister();
2293 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2294 Glue = Chain.getValue(1);
2295 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2296 }
2297 RegsToPass.push_back(std::make_pair(
2298 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2299 } else {
2300 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2301 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2302 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2303 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2304 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2305 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2306 } else if (IsTailCall) {
2307 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2308 Glue = Chain.getValue(1);
2309 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2310 }
2311 }
2312
2313 // Build a sequence of copy-to-reg nodes, chained and glued together.
2314 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
2315 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
2316 RegsToPass[I].second, Glue);
2317 Glue = Chain.getValue(1);
2318 }
2319
2320 // The first call operand is the chain and the second is the target address.
2322 Ops.push_back(Chain);
2323 Ops.push_back(Callee);
2324
2325 // Add argument registers to the end of the list so that they are
2326 // known live into the call.
2327 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
2328 Ops.push_back(DAG.getRegister(RegsToPass[I].first,
2329 RegsToPass[I].second.getValueType()));
2330
2331 // Add a register mask operand representing the call-preserved registers.
2332 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2333 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2334 assert(Mask && "Missing call preserved mask for calling convention");
2335 Ops.push_back(DAG.getRegisterMask(Mask));
2336
2337 // Glue the call to the argument copies, if any.
2338 if (Glue.getNode())
2339 Ops.push_back(Glue);
2340
2341 // Emit the call.
2342 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2343 if (IsTailCall) {
2344 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2345 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2346 return Ret;
2347 }
2348 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2349 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2350 Glue = Chain.getValue(1);
2351
2352 // Mark the end of the call, which is glued to the call itself.
2353 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2354 Glue = Chain.getValue(1);
2355
2356 // Assign locations to each value returned by this call.
2358 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2359 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2360
2361 // Copy all of the result registers out of their specified physreg.
2362 for (CCValAssign &VA : RetLocs) {
2363 // Copy the value out, gluing the copy to the end of the call sequence.
2364 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2365 VA.getLocVT(), Glue);
2366 Chain = RetValue.getValue(1);
2367 Glue = RetValue.getValue(2);
2368
2369 // Convert the value of the return register into the value that's
2370 // being returned.
2371 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2372 }
2373
2374 return Chain;
2375}
2376
2377// Generate a call taking the given operands as arguments and returning a
2378// result of type RetVT.
2380 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2381 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2382 bool DoesNotReturn, bool IsReturnValueUsed) const {
2384 Args.reserve(Ops.size());
2385
2387 for (SDValue Op : Ops) {
2388 Entry.Node = Op;
2389 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
2390 Entry.IsSExt = shouldSignExtendTypeInLibCall(Entry.Ty, IsSigned);
2391 Entry.IsZExt = !Entry.IsSExt;
2392 Args.push_back(Entry);
2393 }
2394
2395 SDValue Callee =
2396 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2397
2398 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2400 bool SignExtend = shouldSignExtendTypeInLibCall(RetTy, IsSigned);
2401 CLI.setDebugLoc(DL)
2402 .setChain(Chain)
2403 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2404 .setNoReturn(DoesNotReturn)
2405 .setDiscardResult(!IsReturnValueUsed)
2406 .setSExtResult(SignExtend)
2407 .setZExtResult(!SignExtend);
2408 return LowerCallTo(CLI);
2409}
2410
2413 MachineFunction &MF, bool isVarArg,
2415 LLVMContext &Context) const {
2416 // Special case that we cannot easily detect in RetCC_SystemZ since
2417 // i128 may not be a legal type.
2418 for (auto &Out : Outs)
2419 if (Out.ArgVT == MVT::i128)
2420 return false;
2421
2423 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
2424 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2425}
2426
2427SDValue
2429 bool IsVarArg,
2431 const SmallVectorImpl<SDValue> &OutVals,
2432 const SDLoc &DL, SelectionDAG &DAG) const {
2434
2435 // Integer args <=32 bits should have an extension attribute.
2436 verifyNarrowIntegerArgs_Ret(Outs, &MF.getFunction());
2437
2438 // Assign locations to each returned value.
2440 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2441 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2442
2443 // Quick exit for void returns
2444 if (RetLocs.empty())
2445 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2446
2447 if (CallConv == CallingConv::GHC)
2448 report_fatal_error("GHC functions return void only");
2449
2450 // Copy the result values into the output registers.
2451 SDValue Glue;
2453 RetOps.push_back(Chain);
2454 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2455 CCValAssign &VA = RetLocs[I];
2456 SDValue RetValue = OutVals[I];
2457
2458 // Make the return register live on exit.
2459 assert(VA.isRegLoc() && "Can only return in registers!");
2460
2461 // Promote the value as required.
2462 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2463
2464 // Chain and glue the copies together.
2465 Register Reg = VA.getLocReg();
2466 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2467 Glue = Chain.getValue(1);
2468 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2469 }
2470
2471 // Update chain and glue.
2472 RetOps[0] = Chain;
2473 if (Glue.getNode())
2474 RetOps.push_back(Glue);
2475
2476 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2477}
2478
2479// Return true if Op is an intrinsic node with chain that returns the CC value
2480// as its only (other) argument. Provide the associated SystemZISD opcode and
2481// the mask of valid CC values if so.
2482static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2483 unsigned &CCValid) {
2484 unsigned Id = Op.getConstantOperandVal(1);
2485 switch (Id) {
2486 case Intrinsic::s390_tbegin:
2487 Opcode = SystemZISD::TBEGIN;
2488 CCValid = SystemZ::CCMASK_TBEGIN;
2489 return true;
2490
2491 case Intrinsic::s390_tbegin_nofloat:
2493 CCValid = SystemZ::CCMASK_TBEGIN;
2494 return true;
2495
2496 case Intrinsic::s390_tend:
2497 Opcode = SystemZISD::TEND;
2498 CCValid = SystemZ::CCMASK_TEND;
2499 return true;
2500
2501 default:
2502 return false;
2503 }
2504}
2505
2506// Return true if Op is an intrinsic node without chain that returns the
2507// CC value as its final argument. Provide the associated SystemZISD
2508// opcode and the mask of valid CC values if so.
2509static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2510 unsigned Id = Op.getConstantOperandVal(0);
2511 switch (Id) {
2512 case Intrinsic::s390_vpkshs:
2513 case Intrinsic::s390_vpksfs:
2514 case Intrinsic::s390_vpksgs:
2515 Opcode = SystemZISD::PACKS_CC;
2516 CCValid = SystemZ::CCMASK_VCMP;
2517 return true;
2518
2519 case Intrinsic::s390_vpklshs:
2520 case Intrinsic::s390_vpklsfs:
2521 case Intrinsic::s390_vpklsgs:
2522 Opcode = SystemZISD::PACKLS_CC;
2523 CCValid = SystemZ::CCMASK_VCMP;
2524 return true;
2525
2526 case Intrinsic::s390_vceqbs:
2527 case Intrinsic::s390_vceqhs:
2528 case Intrinsic::s390_vceqfs:
2529 case Intrinsic::s390_vceqgs:
2530 Opcode = SystemZISD::VICMPES;
2531 CCValid = SystemZ::CCMASK_VCMP;
2532 return true;
2533
2534 case Intrinsic::s390_vchbs:
2535 case Intrinsic::s390_vchhs:
2536 case Intrinsic::s390_vchfs:
2537 case Intrinsic::s390_vchgs:
2538 Opcode = SystemZISD::VICMPHS;
2539 CCValid = SystemZ::CCMASK_VCMP;
2540 return true;
2541
2542 case Intrinsic::s390_vchlbs:
2543 case Intrinsic::s390_vchlhs:
2544 case Intrinsic::s390_vchlfs:
2545 case Intrinsic::s390_vchlgs:
2546 Opcode = SystemZISD::VICMPHLS;
2547 CCValid = SystemZ::CCMASK_VCMP;
2548 return true;
2549
2550 case Intrinsic::s390_vtm:
2551 Opcode = SystemZISD::VTM;
2552 CCValid = SystemZ::CCMASK_VCMP;
2553 return true;
2554
2555 case Intrinsic::s390_vfaebs:
2556 case Intrinsic::s390_vfaehs:
2557 case Intrinsic::s390_vfaefs:
2558 Opcode = SystemZISD::VFAE_CC;
2559 CCValid = SystemZ::CCMASK_ANY;
2560 return true;
2561
2562 case Intrinsic::s390_vfaezbs:
2563 case Intrinsic::s390_vfaezhs:
2564 case Intrinsic::s390_vfaezfs:
2565 Opcode = SystemZISD::VFAEZ_CC;
2566 CCValid = SystemZ::CCMASK_ANY;
2567 return true;
2568
2569 case Intrinsic::s390_vfeebs:
2570 case Intrinsic::s390_vfeehs:
2571 case Intrinsic::s390_vfeefs:
2572 Opcode = SystemZISD::VFEE_CC;
2573 CCValid = SystemZ::CCMASK_ANY;
2574 return true;
2575
2576 case Intrinsic::s390_vfeezbs:
2577 case Intrinsic::s390_vfeezhs:
2578 case Intrinsic::s390_vfeezfs:
2579 Opcode = SystemZISD::VFEEZ_CC;
2580 CCValid = SystemZ::CCMASK_ANY;
2581 return true;
2582
2583 case Intrinsic::s390_vfenebs:
2584 case Intrinsic::s390_vfenehs:
2585 case Intrinsic::s390_vfenefs:
2586 Opcode = SystemZISD::VFENE_CC;
2587 CCValid = SystemZ::CCMASK_ANY;
2588 return true;
2589
2590 case Intrinsic::s390_vfenezbs:
2591 case Intrinsic::s390_vfenezhs:
2592 case Intrinsic::s390_vfenezfs:
2593 Opcode = SystemZISD::VFENEZ_CC;
2594 CCValid = SystemZ::CCMASK_ANY;
2595 return true;
2596
2597 case Intrinsic::s390_vistrbs:
2598 case Intrinsic::s390_vistrhs:
2599 case Intrinsic::s390_vistrfs:
2600 Opcode = SystemZISD::VISTR_CC;
2602 return true;
2603
2604 case Intrinsic::s390_vstrcbs:
2605 case Intrinsic::s390_vstrchs:
2606 case Intrinsic::s390_vstrcfs:
2607 Opcode = SystemZISD::VSTRC_CC;
2608 CCValid = SystemZ::CCMASK_ANY;
2609 return true;
2610
2611 case Intrinsic::s390_vstrczbs:
2612 case Intrinsic::s390_vstrczhs:
2613 case Intrinsic::s390_vstrczfs:
2614 Opcode = SystemZISD::VSTRCZ_CC;
2615 CCValid = SystemZ::CCMASK_ANY;
2616 return true;
2617
2618 case Intrinsic::s390_vstrsb:
2619 case Intrinsic::s390_vstrsh:
2620 case Intrinsic::s390_vstrsf:
2621 Opcode = SystemZISD::VSTRS_CC;
2622 CCValid = SystemZ::CCMASK_ANY;
2623 return true;
2624
2625 case Intrinsic::s390_vstrszb:
2626 case Intrinsic::s390_vstrszh:
2627 case Intrinsic::s390_vstrszf:
2628 Opcode = SystemZISD::VSTRSZ_CC;
2629 CCValid = SystemZ::CCMASK_ANY;
2630 return true;
2631
2632 case Intrinsic::s390_vfcedbs:
2633 case Intrinsic::s390_vfcesbs:
2634 Opcode = SystemZISD::VFCMPES;
2635 CCValid = SystemZ::CCMASK_VCMP;
2636 return true;
2637
2638 case Intrinsic::s390_vfchdbs:
2639 case Intrinsic::s390_vfchsbs:
2640 Opcode = SystemZISD::VFCMPHS;
2641 CCValid = SystemZ::CCMASK_VCMP;
2642 return true;
2643
2644 case Intrinsic::s390_vfchedbs:
2645 case Intrinsic::s390_vfchesbs:
2646 Opcode = SystemZISD::VFCMPHES;
2647 CCValid = SystemZ::CCMASK_VCMP;
2648 return true;
2649
2650 case Intrinsic::s390_vftcidb:
2651 case Intrinsic::s390_vftcisb:
2652 Opcode = SystemZISD::VFTCI;
2653 CCValid = SystemZ::CCMASK_VCMP;
2654 return true;
2655
2656 case Intrinsic::s390_tdc:
2657 Opcode = SystemZISD::TDC;
2658 CCValid = SystemZ::CCMASK_TDC;
2659 return true;
2660
2661 default:
2662 return false;
2663 }
2664}
2665
2666// Emit an intrinsic with chain and an explicit CC register result.
2668 unsigned Opcode) {
2669 // Copy all operands except the intrinsic ID.
2670 unsigned NumOps = Op.getNumOperands();
2672 Ops.reserve(NumOps - 1);
2673 Ops.push_back(Op.getOperand(0));
2674 for (unsigned I = 2; I < NumOps; ++I)
2675 Ops.push_back(Op.getOperand(I));
2676
2677 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2678 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2679 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2680 SDValue OldChain = SDValue(Op.getNode(), 1);
2681 SDValue NewChain = SDValue(Intr.getNode(), 1);
2682 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2683 return Intr.getNode();
2684}
2685
2686// Emit an intrinsic with an explicit CC register result.
2688 unsigned Opcode) {
2689 // Copy all operands except the intrinsic ID.
2690 unsigned NumOps = Op.getNumOperands();
2692 Ops.reserve(NumOps - 1);
2693 for (unsigned I = 1; I < NumOps; ++I)
2694 Ops.push_back(Op.getOperand(I));
2695
2696 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2697 return Intr.getNode();
2698}
2699
2700// CC is a comparison that will be implemented using an integer or
2701// floating-point comparison. Return the condition code mask for
2702// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2703// unsigned comparisons and clear for signed ones. In the floating-point
2704// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2706#define CONV(X) \
2707 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2708 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2709 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2710
2711 switch (CC) {
2712 default:
2713 llvm_unreachable("Invalid integer condition!");
2714
2715 CONV(EQ);
2716 CONV(NE);
2717 CONV(GT);
2718 CONV(GE);
2719 CONV(LT);
2720 CONV(LE);
2721
2722 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2724 }
2725#undef CONV
2726}
2727
2728// If C can be converted to a comparison against zero, adjust the operands
2729// as necessary.
2730static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2731 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2732 return;
2733
2734 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2735 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2736 return;
2737
2738 int64_t Value = ConstOp1->getSExtValue();
2739 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2740 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2741 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2742 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2743 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2744 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2745 }
2746}
2747
2748// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2749// adjust the operands as necessary.
2750static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2751 Comparison &C) {
2752 // For us to make any changes, it must a comparison between a single-use
2753 // load and a constant.
2754 if (!C.Op0.hasOneUse() ||
2755 C.Op0.getOpcode() != ISD::LOAD ||
2756 C.Op1.getOpcode() != ISD::Constant)
2757 return;
2758
2759 // We must have an 8- or 16-bit load.
2760 auto *Load = cast<LoadSDNode>(C.Op0);
2761 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2762 if ((NumBits != 8 && NumBits != 16) ||
2763 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2764 return;
2765
2766 // The load must be an extending one and the constant must be within the
2767 // range of the unextended value.
2768 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2769 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2770 return;
2771 uint64_t Value = ConstOp1->getZExtValue();
2772 uint64_t Mask = (1 << NumBits) - 1;
2773 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2774 // Make sure that ConstOp1 is in range of C.Op0.
2775 int64_t SignedValue = ConstOp1->getSExtValue();
2776 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2777 return;
2778 if (C.ICmpType != SystemZICMP::SignedOnly) {
2779 // Unsigned comparison between two sign-extended values is equivalent
2780 // to unsigned comparison between two zero-extended values.
2781 Value &= Mask;
2782 } else if (NumBits == 8) {
2783 // Try to treat the comparison as unsigned, so that we can use CLI.
2784 // Adjust CCMask and Value as necessary.
2785 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2786 // Test whether the high bit of the byte is set.
2787 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2788 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2789 // Test whether the high bit of the byte is clear.
2790 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2791 else
2792 // No instruction exists for this combination.
2793 return;
2794 C.ICmpType = SystemZICMP::UnsignedOnly;
2795 }
2796 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2797 if (Value > Mask)
2798 return;
2799 // If the constant is in range, we can use any comparison.
2800 C.ICmpType = SystemZICMP::Any;
2801 } else
2802 return;
2803
2804 // Make sure that the first operand is an i32 of the right extension type.
2805 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2808 if (C.Op0.getValueType() != MVT::i32 ||
2809 Load->getExtensionType() != ExtType) {
2810 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2811 Load->getBasePtr(), Load->getPointerInfo(),
2812 Load->getMemoryVT(), Load->getAlign(),
2813 Load->getMemOperand()->getFlags());
2814 // Update the chain uses.
2815 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2816 }
2817
2818 // Make sure that the second operand is an i32 with the right value.
2819 if (C.Op1.getValueType() != MVT::i32 ||
2820 Value != ConstOp1->getZExtValue())
2821 C.Op1 = DAG.getConstant((uint32_t)Value, DL, MVT::i32);
2822}
2823
2824// Return true if Op is either an unextended load, or a load suitable
2825// for integer register-memory comparisons of type ICmpType.
2826static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2827 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2828 if (Load) {
2829 // There are no instructions to compare a register with a memory byte.
2830 if (Load->getMemoryVT() == MVT::i8)
2831 return false;
2832 // Otherwise decide on extension type.
2833 switch (Load->getExtensionType()) {
2834 case ISD::NON_EXTLOAD:
2835 return true;
2836 case ISD::SEXTLOAD:
2837 return ICmpType != SystemZICMP::UnsignedOnly;
2838 case ISD::ZEXTLOAD:
2839 return ICmpType != SystemZICMP::SignedOnly;
2840 default:
2841 break;
2842 }
2843 }
2844 return false;
2845}
2846
2847// Return true if it is better to swap the operands of C.
2848static bool shouldSwapCmpOperands(const Comparison &C) {
2849 // Leave i128 and f128 comparisons alone, since they have no memory forms.
2850 if (C.Op0.getValueType() == MVT::i128)
2851 return false;
2852 if (C.Op0.getValueType() == MVT::f128)
2853 return false;
2854
2855 // Always keep a floating-point constant second, since comparisons with
2856 // zero can use LOAD TEST and comparisons with other constants make a
2857 // natural memory operand.
2858 if (isa<ConstantFPSDNode>(C.Op1))
2859 return false;
2860
2861 // Never swap comparisons with zero since there are many ways to optimize
2862 // those later.
2863 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2864 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2865 return false;
2866
2867 // Also keep natural memory operands second if the loaded value is
2868 // only used here. Several comparisons have memory forms.
2869 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2870 return false;
2871
2872 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2873 // In that case we generally prefer the memory to be second.
2874 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2875 // The only exceptions are when the second operand is a constant and
2876 // we can use things like CHHSI.
2877 if (!ConstOp1)
2878 return true;
2879 // The unsigned memory-immediate instructions can handle 16-bit
2880 // unsigned integers.
2881 if (C.ICmpType != SystemZICMP::SignedOnly &&
2882 isUInt<16>(ConstOp1->getZExtValue()))
2883 return false;
2884 // The signed memory-immediate instructions can handle 16-bit
2885 // signed integers.
2886 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2887 isInt<16>(ConstOp1->getSExtValue()))
2888 return false;
2889 return true;
2890 }
2891
2892 // Try to promote the use of CGFR and CLGFR.
2893 unsigned Opcode0 = C.Op0.getOpcode();
2894 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2895 return true;
2896 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2897 return true;
2898 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
2899 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2900 C.Op0.getConstantOperandVal(1) == 0xffffffff)
2901 return true;
2902
2903 return false;
2904}
2905
2906// Check whether C tests for equality between X and Y and whether X - Y
2907// or Y - X is also computed. In that case it's better to compare the
2908// result of the subtraction against zero.
2910 Comparison &C) {
2911 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2912 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2913 for (SDNode *N : C.Op0->users()) {
2914 if (N->getOpcode() == ISD::SUB &&
2915 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2916 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2917 // Disable the nsw and nuw flags: the backend needs to handle
2918 // overflow as well during comparison elimination.
2919 N->dropFlags(SDNodeFlags::NoWrap);
2920 C.Op0 = SDValue(N, 0);
2921 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2922 return;
2923 }
2924 }
2925 }
2926}
2927
2928// Check whether C compares a floating-point value with zero and if that
2929// floating-point value is also negated. In this case we can use the
2930// negation to set CC, so avoiding separate LOAD AND TEST and
2931// LOAD (NEGATIVE/COMPLEMENT) instructions.
2932static void adjustForFNeg(Comparison &C) {
2933 // This optimization is invalid for strict comparisons, since FNEG
2934 // does not raise any exceptions.
2935 if (C.Chain)
2936 return;
2937 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2938 if (C1 && C1->isZero()) {
2939 for (SDNode *N : C.Op0->users()) {
2940 if (N->getOpcode() == ISD::FNEG) {
2941 C.Op0 = SDValue(N, 0);
2942 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2943 return;
2944 }
2945 }
2946 }
2947}
2948
2949// Check whether C compares (shl X, 32) with 0 and whether X is
2950// also sign-extended. In that case it is better to test the result
2951// of the sign extension using LTGFR.
2952//
2953// This case is important because InstCombine transforms a comparison
2954// with (sext (trunc X)) into a comparison with (shl X, 32).
2955static void adjustForLTGFR(Comparison &C) {
2956 // Check for a comparison between (shl X, 32) and 0.
2957 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
2958 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
2959 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2960 if (C1 && C1->getZExtValue() == 32) {
2961 SDValue ShlOp0 = C.Op0.getOperand(0);
2962 // See whether X has any SIGN_EXTEND_INREG uses.
2963 for (SDNode *N : ShlOp0->users()) {
2964 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2965 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2966 C.Op0 = SDValue(N, 0);
2967 return;
2968 }
2969 }
2970 }
2971 }
2972}
2973
2974// If C compares the truncation of an extending load, try to compare
2975// the untruncated value instead. This exposes more opportunities to
2976// reuse CC.
2977static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2978 Comparison &C) {
2979 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2980 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2981 C.Op1.getOpcode() == ISD::Constant &&
2982 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
2983 C.Op1->getAsZExtVal() == 0) {
2984 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2985 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
2986 C.Op0.getValueSizeInBits().getFixedValue()) {
2987 unsigned Type = L->getExtensionType();
2988 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2989 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2990 C.Op0 = C.Op0.getOperand(0);
2991 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2992 }
2993 }
2994 }
2995}
2996
2997// Return true if shift operation N has an in-range constant shift value.
2998// Store it in ShiftVal if so.
2999static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
3000 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
3001 if (!Shift)
3002 return false;
3003
3004 uint64_t Amount = Shift->getZExtValue();
3005 if (Amount >= N.getValueSizeInBits())
3006 return false;
3007
3008 ShiftVal = Amount;
3009 return true;
3010}
3011
3012// Check whether an AND with Mask is suitable for a TEST UNDER MASK
3013// instruction and whether the CC value is descriptive enough to handle
3014// a comparison of type Opcode between the AND result and CmpVal.
3015// CCMask says which comparison result is being tested and BitSize is
3016// the number of bits in the operands. If TEST UNDER MASK can be used,
3017// return the corresponding CC mask, otherwise return 0.
3018static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
3019 uint64_t Mask, uint64_t CmpVal,
3020 unsigned ICmpType) {
3021 assert(Mask != 0 && "ANDs with zero should have been removed by now");
3022
3023 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
3024 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
3025 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
3026 return 0;
3027
3028 // Work out the masks for the lowest and highest bits.
3030 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
3031
3032 // Signed ordered comparisons are effectively unsigned if the sign
3033 // bit is dropped.
3034 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
3035
3036 // Check for equality comparisons with 0, or the equivalent.
3037 if (CmpVal == 0) {
3038 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3040 if (CCMask == SystemZ::CCMASK_CMP_NE)
3042 }
3043 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
3044 if (CCMask == SystemZ::CCMASK_CMP_LT)
3046 if (CCMask == SystemZ::CCMASK_CMP_GE)
3048 }
3049 if (EffectivelyUnsigned && CmpVal < Low) {
3050 if (CCMask == SystemZ::CCMASK_CMP_LE)
3052 if (CCMask == SystemZ::CCMASK_CMP_GT)
3054 }
3055
3056 // Check for equality comparisons with the mask, or the equivalent.
3057 if (CmpVal == Mask) {
3058 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3060 if (CCMask == SystemZ::CCMASK_CMP_NE)
3062 }
3063 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
3064 if (CCMask == SystemZ::CCMASK_CMP_GT)
3066 if (CCMask == SystemZ::CCMASK_CMP_LE)
3068 }
3069 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
3070 if (CCMask == SystemZ::CCMASK_CMP_GE)
3072 if (CCMask == SystemZ::CCMASK_CMP_LT)
3074 }
3075
3076 // Check for ordered comparisons with the top bit.
3077 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
3078 if (CCMask == SystemZ::CCMASK_CMP_LE)
3080 if (CCMask == SystemZ::CCMASK_CMP_GT)
3082 }
3083 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
3084 if (CCMask == SystemZ::CCMASK_CMP_LT)
3086 if (CCMask == SystemZ::CCMASK_CMP_GE)
3088 }
3089
3090 // If there are just two bits, we can do equality checks for Low and High
3091 // as well.
3092 if (Mask == Low + High) {
3093 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
3095 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
3097 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
3099 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
3101 }
3102
3103 // Looks like we've exhausted our options.
3104 return 0;
3105}
3106
3107// See whether C can be implemented as a TEST UNDER MASK instruction.
3108// Update the arguments with the TM version if so.
3110 Comparison &C) {
3111 // Use VECTOR TEST UNDER MASK for i128 operations.
3112 if (C.Op0.getValueType() == MVT::i128) {
3113 // We can use VTM for EQ/NE comparisons of x & y against 0.
3114 if (C.Op0.getOpcode() == ISD::AND &&
3115 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3116 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
3117 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
3118 if (Mask && Mask->getAPIntValue() == 0) {
3119 C.Opcode = SystemZISD::VTM;
3120 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
3121 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
3122 C.CCValid = SystemZ::CCMASK_VCMP;
3123 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3124 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3125 else
3126 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3127 }
3128 }
3129 return;
3130 }
3131
3132 // Check that we have a comparison with a constant.
3133 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
3134 if (!ConstOp1)
3135 return;
3136 uint64_t CmpVal = ConstOp1->getZExtValue();
3137
3138 // Check whether the nonconstant input is an AND with a constant mask.
3139 Comparison NewC(C);
3140 uint64_t MaskVal;
3141 ConstantSDNode *Mask = nullptr;
3142 if (C.Op0.getOpcode() == ISD::AND) {
3143 NewC.Op0 = C.Op0.getOperand(0);
3144 NewC.Op1 = C.Op0.getOperand(1);
3145 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
3146 if (!Mask)
3147 return;
3148 MaskVal = Mask->getZExtValue();
3149 } else {
3150 // There is no instruction to compare with a 64-bit immediate
3151 // so use TMHH instead if possible. We need an unsigned ordered
3152 // comparison with an i64 immediate.
3153 if (NewC.Op0.getValueType() != MVT::i64 ||
3154 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
3155 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
3156 NewC.ICmpType == SystemZICMP::SignedOnly)
3157 return;
3158 // Convert LE and GT comparisons into LT and GE.
3159 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
3160 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
3161 if (CmpVal == uint64_t(-1))
3162 return;
3163 CmpVal += 1;
3164 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
3165 }
3166 // If the low N bits of Op1 are zero than the low N bits of Op0 can
3167 // be masked off without changing the result.
3168 MaskVal = -(CmpVal & -CmpVal);
3169 NewC.ICmpType = SystemZICMP::UnsignedOnly;
3170 }
3171 if (!MaskVal)
3172 return;
3173
3174 // Check whether the combination of mask, comparison value and comparison
3175 // type are suitable.
3176 unsigned BitSize = NewC.Op0.getValueSizeInBits();
3177 unsigned NewCCMask, ShiftVal;
3178 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3179 NewC.Op0.getOpcode() == ISD::SHL &&
3180 isSimpleShift(NewC.Op0, ShiftVal) &&
3181 (MaskVal >> ShiftVal != 0) &&
3182 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
3183 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3184 MaskVal >> ShiftVal,
3185 CmpVal >> ShiftVal,
3186 SystemZICMP::Any))) {
3187 NewC.Op0 = NewC.Op0.getOperand(0);
3188 MaskVal >>= ShiftVal;
3189 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3190 NewC.Op0.getOpcode() == ISD::SRL &&
3191 isSimpleShift(NewC.Op0, ShiftVal) &&
3192 (MaskVal << ShiftVal != 0) &&
3193 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
3194 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3195 MaskVal << ShiftVal,
3196 CmpVal << ShiftVal,
3198 NewC.Op0 = NewC.Op0.getOperand(0);
3199 MaskVal <<= ShiftVal;
3200 } else {
3201 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
3202 NewC.ICmpType);
3203 if (!NewCCMask)
3204 return;
3205 }
3206
3207 // Go ahead and make the change.
3208 C.Opcode = SystemZISD::TM;
3209 C.Op0 = NewC.Op0;
3210 if (Mask && Mask->getZExtValue() == MaskVal)
3211 C.Op1 = SDValue(Mask, 0);
3212 else
3213 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
3214 C.CCValid = SystemZ::CCMASK_TM;
3215 C.CCMask = NewCCMask;
3216}
3217
3218// Implement i128 comparison in vector registers.
3219static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
3220 Comparison &C) {
3221 if (C.Opcode != SystemZISD::ICMP)
3222 return;
3223 if (C.Op0.getValueType() != MVT::i128)
3224 return;
3225
3226 // (In-)Equality comparisons can be implemented via VCEQGS.
3227 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3228 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3229 C.Opcode = SystemZISD::VICMPES;
3230 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
3231 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
3232 C.CCValid = SystemZ::CCMASK_VCMP;
3233 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3234 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3235 else
3236 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3237 return;
3238 }
3239
3240 // Normalize other comparisons to GT.
3241 bool Swap = false, Invert = false;
3242 switch (C.CCMask) {
3243 case SystemZ::CCMASK_CMP_GT: break;
3244 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
3245 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
3246 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
3247 default: llvm_unreachable("Invalid integer condition!");
3248 }
3249 if (Swap)
3250 std::swap(C.Op0, C.Op1);
3251
3252 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3253 C.Opcode = SystemZISD::UCMP128HI;
3254 else
3255 C.Opcode = SystemZISD::SCMP128HI;
3256 C.CCValid = SystemZ::CCMASK_ANY;
3257 C.CCMask = SystemZ::CCMASK_1;
3258
3259 if (Invert)
3260 C.CCMask ^= C.CCValid;
3261}
3262
3263// See whether the comparison argument contains a redundant AND
3264// and remove it if so. This sometimes happens due to the generic
3265// BRCOND expansion.
3267 Comparison &C) {
3268 if (C.Op0.getOpcode() != ISD::AND)
3269 return;
3270 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3271 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3272 return;
3273 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3274 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3275 return;
3276
3277 C.Op0 = C.Op0.getOperand(0);
3278}
3279
3280// Return a Comparison that tests the condition-code result of intrinsic
3281// node Call against constant integer CC using comparison code Cond.
3282// Opcode is the opcode of the SystemZISD operation for the intrinsic
3283// and CCValid is the set of possible condition-code results.
3284static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3285 SDValue Call, unsigned CCValid, uint64_t CC,
3287 Comparison C(Call, SDValue(), SDValue());
3288 C.Opcode = Opcode;
3289 C.CCValid = CCValid;
3290 if (Cond == ISD::SETEQ)
3291 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3292 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3293 else if (Cond == ISD::SETNE)
3294 // ...and the inverse of that.
3295 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3296 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3297 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3298 // always true for CC>3.
3299 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3300 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3301 // ...and the inverse of that.
3302 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3303 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3304 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3305 // always true for CC>3.
3306 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3307 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3308 // ...and the inverse of that.
3309 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3310 else
3311 llvm_unreachable("Unexpected integer comparison type");
3312 C.CCMask &= CCValid;
3313 return C;
3314}
3315
3316// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3317static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3318 ISD::CondCode Cond, const SDLoc &DL,
3319 SDValue Chain = SDValue(),
3320 bool IsSignaling = false) {
3321 if (CmpOp1.getOpcode() == ISD::Constant) {
3322 assert(!Chain);
3323 unsigned Opcode, CCValid;
3324 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3325 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3326 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3327 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3328 CmpOp1->getAsZExtVal(), Cond);
3329 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3330 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3331 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3332 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3333 CmpOp1->getAsZExtVal(), Cond);
3334 }
3335 Comparison C(CmpOp0, CmpOp1, Chain);
3336 C.CCMask = CCMaskForCondCode(Cond);
3337 if (C.Op0.getValueType().isFloatingPoint()) {
3338 C.CCValid = SystemZ::CCMASK_FCMP;
3339 if (!C.Chain)
3340 C.Opcode = SystemZISD::FCMP;
3341 else if (!IsSignaling)
3342 C.Opcode = SystemZISD::STRICT_FCMP;
3343 else
3344 C.Opcode = SystemZISD::STRICT_FCMPS;
3346 } else {
3347 assert(!C.Chain);
3348 C.CCValid = SystemZ::CCMASK_ICMP;
3349 C.Opcode = SystemZISD::ICMP;
3350 // Choose the type of comparison. Equality and inequality tests can
3351 // use either signed or unsigned comparisons. The choice also doesn't
3352 // matter if both sign bits are known to be clear. In those cases we
3353 // want to give the main isel code the freedom to choose whichever
3354 // form fits best.
3355 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3356 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3357 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3358 C.ICmpType = SystemZICMP::Any;
3359 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3360 C.ICmpType = SystemZICMP::UnsignedOnly;
3361 else
3362 C.ICmpType = SystemZICMP::SignedOnly;
3363 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3364 adjustForRedundantAnd(DAG, DL, C);
3365 adjustZeroCmp(DAG, DL, C);
3366 adjustSubwordCmp(DAG, DL, C);
3367 adjustForSubtraction(DAG, DL, C);
3369 adjustICmpTruncate(DAG, DL, C);
3370 }
3371
3372 if (shouldSwapCmpOperands(C)) {
3373 std::swap(C.Op0, C.Op1);
3374 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3375 }
3376
3378 adjustICmp128(DAG, DL, C);
3379 return C;
3380}
3381
3382// Emit the comparison instruction described by C.
3383static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3384 if (!C.Op1.getNode()) {
3385 SDNode *Node;
3386 switch (C.Op0.getOpcode()) {
3388 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3389 return SDValue(Node, 0);
3391 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3392 return SDValue(Node, Node->getNumValues() - 1);
3393 default:
3394 llvm_unreachable("Invalid comparison operands");
3395 }
3396 }
3397 if (C.Opcode == SystemZISD::ICMP)
3398 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3399 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3400 if (C.Opcode == SystemZISD::TM) {
3401 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3403 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3404 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3405 }
3406 if (C.Opcode == SystemZISD::VICMPES) {
3407 SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32);
3408 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3409 return SDValue(Val.getNode(), 1);
3410 }
3411 if (C.Chain) {
3412 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3413 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3414 }
3415 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3416}
3417
3418// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3419// 64 bits. Extend is the extension type to use. Store the high part
3420// in Hi and the low part in Lo.
3421static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3422 SDValue Op0, SDValue Op1, SDValue &Hi,
3423 SDValue &Lo) {
3424 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3425 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3426 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3427 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3428 DAG.getConstant(32, DL, MVT::i64));
3429 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3430 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3431}
3432
3433// Lower a binary operation that produces two VT results, one in each
3434// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3435// and Opcode performs the GR128 operation. Store the even register result
3436// in Even and the odd register result in Odd.
3437static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3438 unsigned Opcode, SDValue Op0, SDValue Op1,
3439 SDValue &Even, SDValue &Odd) {
3440 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3441 bool Is32Bit = is32Bit(VT);
3442 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3443 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3444}
3445
3446// Return an i32 value that is 1 if the CC value produced by CCReg is
3447// in the mask CCMask and 0 otherwise. CC is known to have a value
3448// in CCValid, so other values can be ignored.
3449static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3450 unsigned CCValid, unsigned CCMask) {
3451 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3452 DAG.getConstant(0, DL, MVT::i32),
3453 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3454 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3455 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3456}
3457
3458// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3459// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3460// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3461// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3462// floating-point comparisons.
3465 switch (CC) {
3466 case ISD::SETOEQ:
3467 case ISD::SETEQ:
3468 switch (Mode) {
3469 case CmpMode::Int: return SystemZISD::VICMPE;
3470 case CmpMode::FP: return SystemZISD::VFCMPE;
3471 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3472 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3473 }
3474 llvm_unreachable("Bad mode");
3475
3476 case ISD::SETOGE:
3477 case ISD::SETGE:
3478 switch (Mode) {
3479 case CmpMode::Int: return 0;
3480 case CmpMode::FP: return SystemZISD::VFCMPHE;
3481 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3482 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3483 }
3484 llvm_unreachable("Bad mode");
3485
3486 case ISD::SETOGT:
3487 case ISD::SETGT:
3488 switch (Mode) {
3489 case CmpMode::Int: return SystemZISD::VICMPH;
3490 case CmpMode::FP: return SystemZISD::VFCMPH;
3491 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3492 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3493 }
3494 llvm_unreachable("Bad mode");
3495
3496 case ISD::SETUGT:
3497 switch (Mode) {
3498 case CmpMode::Int: return SystemZISD::VICMPHL;
3499 case CmpMode::FP: return 0;
3500 case CmpMode::StrictFP: return 0;
3501 case CmpMode::SignalingFP: return 0;
3502 }
3503 llvm_unreachable("Bad mode");
3504
3505 default:
3506 return 0;
3507 }
3508}
3509
3510// Return the SystemZISD vector comparison operation for CC or its inverse,
3511// or 0 if neither can be done directly. Indicate in Invert whether the
3512// result is for the inverse of CC. Mode is as above.
3514 bool &Invert) {
3515 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3516 Invert = false;
3517 return Opcode;
3518 }
3519
3520 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3521 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3522 Invert = true;
3523 return Opcode;
3524 }
3525
3526 return 0;
3527}
3528
3529// Return a v2f64 that contains the extended form of elements Start and Start+1
3530// of v4f32 value Op. If Chain is nonnull, return the strict form.
3531static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3532 SDValue Op, SDValue Chain) {
3533 int Mask[] = { Start, -1, Start + 1, -1 };
3534 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3535 if (Chain) {
3536 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3537 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3538 }
3539 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3540}
3541
3542// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3543// producing a result of type VT. If Chain is nonnull, return the strict form.
3544SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3545 const SDLoc &DL, EVT VT,
3546 SDValue CmpOp0,
3547 SDValue CmpOp1,
3548 SDValue Chain) const {
3549 // There is no hardware support for v4f32 (unless we have the vector
3550 // enhancements facility 1), so extend the vector into two v2f64s
3551 // and compare those.
3552 if (CmpOp0.getValueType() == MVT::v4f32 &&
3553 !Subtarget.hasVectorEnhancements1()) {
3554 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3555 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3556 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3557 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3558 if (Chain) {
3559 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3560 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3561 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3562 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3563 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3564 H1.getValue(1), L1.getValue(1),
3565 HRes.getValue(1), LRes.getValue(1) };
3566 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3567 SDValue Ops[2] = { Res, NewChain };
3568 return DAG.getMergeValues(Ops, DL);
3569 }
3570 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3571 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3572 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3573 }
3574 if (Chain) {
3575 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3576 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3577 }
3578 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3579}
3580
3581// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3582// an integer mask of type VT. If Chain is nonnull, we have a strict
3583// floating-point comparison. If in addition IsSignaling is true, we have
3584// a strict signaling floating-point comparison.
3585SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3586 const SDLoc &DL, EVT VT,
3588 SDValue CmpOp0,
3589 SDValue CmpOp1,
3590 SDValue Chain,
3591 bool IsSignaling) const {
3592 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3593 assert (!Chain || IsFP);
3594 assert (!IsSignaling || Chain);
3595 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3596 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3597 bool Invert = false;
3598 SDValue Cmp;
3599 switch (CC) {
3600 // Handle tests for order using (or (ogt y x) (oge x y)).
3601 case ISD::SETUO:
3602 Invert = true;
3603 [[fallthrough]];
3604 case ISD::SETO: {
3605 assert(IsFP && "Unexpected integer comparison");
3606 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3607 DL, VT, CmpOp1, CmpOp0, Chain);
3608 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3609 DL, VT, CmpOp0, CmpOp1, Chain);
3610 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3611 if (Chain)
3612 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3613 LT.getValue(1), GE.getValue(1));
3614 break;
3615 }
3616
3617 // Handle <> tests using (or (ogt y x) (ogt x y)).
3618 case ISD::SETUEQ:
3619 Invert = true;
3620 [[fallthrough]];
3621 case ISD::SETONE: {
3622 assert(IsFP && "Unexpected integer comparison");
3623 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3624 DL, VT, CmpOp1, CmpOp0, Chain);
3625 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3626 DL, VT, CmpOp0, CmpOp1, Chain);
3627 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3628 if (Chain)
3629 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3630 LT.getValue(1), GT.getValue(1));
3631 break;
3632 }
3633
3634 // Otherwise a single comparison is enough. It doesn't really
3635 // matter whether we try the inversion or the swap first, since
3636 // there are no cases where both work.
3637 default:
3638 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3639 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3640 else {
3642 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3643 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3644 else
3645 llvm_unreachable("Unhandled comparison");
3646 }
3647 if (Chain)
3648 Chain = Cmp.getValue(1);
3649 break;
3650 }
3651 if (Invert) {
3652 SDValue Mask =
3653 DAG.getSplatBuildVector(VT, DL, DAG.getAllOnesConstant(DL, MVT::i64));
3654 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3655 }
3656 if (Chain && Chain.getNode() != Cmp.getNode()) {
3657 SDValue Ops[2] = { Cmp, Chain };
3658 Cmp = DAG.getMergeValues(Ops, DL);
3659 }
3660 return Cmp;
3661}
3662
3663SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3664 SelectionDAG &DAG) const {
3665 SDValue CmpOp0 = Op.getOperand(0);
3666 SDValue CmpOp1 = Op.getOperand(1);
3667 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3668 SDLoc DL(Op);
3669 EVT VT = Op.getValueType();
3670 if (VT.isVector())
3671 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3672
3673 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3674 SDValue CCReg = emitCmp(DAG, DL, C);
3675 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3676}
3677
3678SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3679 SelectionDAG &DAG,
3680 bool IsSignaling) const {
3681 SDValue Chain = Op.getOperand(0);
3682 SDValue CmpOp0 = Op.getOperand(1);
3683 SDValue CmpOp1 = Op.getOperand(2);
3684 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3685 SDLoc DL(Op);
3686 EVT VT = Op.getNode()->getValueType(0);
3687 if (VT.isVector()) {
3688 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3689 Chain, IsSignaling);
3690 return Res.getValue(Op.getResNo());
3691 }
3692
3693 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3694 SDValue CCReg = emitCmp(DAG, DL, C);
3695 CCReg->setFlags(Op->getFlags());
3696 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3697 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3698 return DAG.getMergeValues(Ops, DL);
3699}
3700
3701SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3702 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3703 SDValue CmpOp0 = Op.getOperand(2);
3704 SDValue CmpOp1 = Op.getOperand(3);
3705 SDValue Dest = Op.getOperand(4);
3706 SDLoc DL(Op);
3707
3708 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3709 SDValue CCReg = emitCmp(DAG, DL, C);
3710 return DAG.getNode(
3711 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3712 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3713 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3714}
3715
3716// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3717// allowing Pos and Neg to be wider than CmpOp.
3718static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3719 return (Neg.getOpcode() == ISD::SUB &&
3720 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3721 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
3722 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3723 Pos.getOperand(0) == CmpOp)));
3724}
3725
3726// Return the absolute or negative absolute of Op; IsNegative decides which.
3728 bool IsNegative) {
3729 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3730 if (IsNegative)
3731 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3732 DAG.getConstant(0, DL, Op.getValueType()), Op);
3733 return Op;
3734}
3735
3736SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3737 SelectionDAG &DAG) const {
3738 SDValue CmpOp0 = Op.getOperand(0);
3739 SDValue CmpOp1 = Op.getOperand(1);
3740 SDValue TrueOp = Op.getOperand(2);
3741 SDValue FalseOp = Op.getOperand(3);
3742 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3743 SDLoc DL(Op);
3744
3745 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3746
3747 // Check for absolute and negative-absolute selections, including those
3748 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3749 // This check supplements the one in DAGCombiner.
3750 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3751 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3752 C.Op1.getOpcode() == ISD::Constant &&
3753 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3754 C.Op1->getAsZExtVal() == 0) {
3755 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3756 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3757 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3758 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3759 }
3760
3761 SDValue CCReg = emitCmp(DAG, DL, C);
3762 SDValue Ops[] = {TrueOp, FalseOp,
3763 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3764 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3765
3766 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3767}
3768
3769SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3770 SelectionDAG &DAG) const {
3771 SDLoc DL(Node);
3772 const GlobalValue *GV = Node->getGlobal();
3773 int64_t Offset = Node->getOffset();
3774 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3776
3778 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3779 if (isInt<32>(Offset)) {
3780 // Assign anchors at 1<<12 byte boundaries.
3781 uint64_t Anchor = Offset & ~uint64_t(0xfff);
3782 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3783 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3784
3785 // The offset can be folded into the address if it is aligned to a
3786 // halfword.
3787 Offset -= Anchor;
3788 if (Offset != 0 && (Offset & 1) == 0) {
3789 SDValue Full =
3790 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3791 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3792 Offset = 0;
3793 }
3794 } else {
3795 // Conservatively load a constant offset greater than 32 bits into a
3796 // register below.
3797 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3798 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3799 }
3800 } else if (Subtarget.isTargetELF()) {
3801 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3802 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3803 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3805 } else if (Subtarget.isTargetzOS()) {
3806 Result = getADAEntry(DAG, GV, DL, PtrVT);
3807 } else
3808 llvm_unreachable("Unexpected Subtarget");
3809
3810 // If there was a non-zero offset that we didn't fold, create an explicit
3811 // addition for it.
3812 if (Offset != 0)
3813 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3814 DAG.getSignedConstant(Offset, DL, PtrVT));
3815
3816 return Result;
3817}
3818
3819SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3820 SelectionDAG &DAG,
3821 unsigned Opcode,
3822 SDValue GOTOffset) const {
3823 SDLoc DL(Node);
3824 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3825 SDValue Chain = DAG.getEntryNode();
3826 SDValue Glue;
3827
3830 report_fatal_error("In GHC calling convention TLS is not supported");
3831
3832 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3833 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3834 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3835 Glue = Chain.getValue(1);
3836 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3837 Glue = Chain.getValue(1);
3838
3839 // The first call operand is the chain and the second is the TLS symbol.
3841 Ops.push_back(Chain);
3842 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3843 Node->getValueType(0),
3844 0, 0));
3845
3846 // Add argument registers to the end of the list so that they are
3847 // known live into the call.
3848 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3849 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3850
3851 // Add a register mask operand representing the call-preserved registers.
3852 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3853 const uint32_t *Mask =
3854 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3855 assert(Mask && "Missing call preserved mask for calling convention");
3856 Ops.push_back(DAG.getRegisterMask(Mask));
3857
3858 // Glue the call to the argument copies.
3859 Ops.push_back(Glue);
3860
3861 // Emit the call.
3862 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3863 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3864 Glue = Chain.getValue(1);
3865
3866 // Copy the return value from %r2.
3867 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3868}
3869
3870SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3871 SelectionDAG &DAG) const {
3872 SDValue Chain = DAG.getEntryNode();
3873 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3874
3875 // The high part of the thread pointer is in access register 0.
3876 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3877 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3878
3879 // The low part of the thread pointer is in access register 1.
3880 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3881 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3882
3883 // Merge them into a single 64-bit address.
3884 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3885 DAG.getConstant(32, DL, PtrVT));
3886 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3887}
3888
3889SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3890 SelectionDAG &DAG) const {
3891 if (DAG.getTarget().useEmulatedTLS())
3892 return LowerToTLSEmulatedModel(Node, DAG);
3893 SDLoc DL(Node);
3894 const GlobalValue *GV = Node->getGlobal();
3895 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3896 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
3897
3900 report_fatal_error("In GHC calling convention TLS is not supported");
3901
3902 SDValue TP = lowerThreadPointer(DL, DAG);
3903
3904 // Get the offset of GA from the thread pointer, based on the TLS model.
3906 switch (model) {
3908 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3911
3912 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3913 Offset = DAG.getLoad(
3914 PtrVT, DL, DAG.getEntryNode(), Offset,
3916
3917 // Call __tls_get_offset to retrieve the offset.
3918 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3919 break;
3920 }
3921
3923 // Load the GOT offset of the module ID.
3926
3927 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3928 Offset = DAG.getLoad(
3929 PtrVT, DL, DAG.getEntryNode(), Offset,
3931
3932 // Call __tls_get_offset to retrieve the module base offset.
3933 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3934
3935 // Note: The SystemZLDCleanupPass will remove redundant computations
3936 // of the module base offset. Count total number of local-dynamic
3937 // accesses to trigger execution of that pass.
3941
3942 // Add the per-symbol offset.
3944
3945 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3946 DTPOffset = DAG.getLoad(
3947 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3949
3950 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3951 break;
3952 }
3953
3954 case TLSModel::InitialExec: {
3955 // Load the offset from the GOT.
3956 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3959 Offset =
3960 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3962 break;
3963 }
3964
3965 case TLSModel::LocalExec: {
3966 // Force the offset into the constant pool and load it from there.
3969
3970 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3971 Offset = DAG.getLoad(
3972 PtrVT, DL, DAG.getEntryNode(), Offset,
3974 break;
3975 }
3976 }
3977
3978 // Add the base and offset together.
3979 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3980}
3981
3982SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3983 SelectionDAG &DAG) const {
3984 SDLoc DL(Node);
3985 const BlockAddress *BA = Node->getBlockAddress();
3986 int64_t Offset = Node->getOffset();
3987 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3988
3989 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3990 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3991 return Result;
3992}
3993
3994SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3995 SelectionDAG &DAG) const {
3996 SDLoc DL(JT);
3997 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3998 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3999
4000 // Use LARL to load the address of the table.
4001 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4002}
4003
4004SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
4005 SelectionDAG &DAG) const {
4006 SDLoc DL(CP);
4007 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4008
4010 if (CP->isMachineConstantPoolEntry())
4011 Result =
4012 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
4013 else
4014 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
4015 CP->getOffset());
4016
4017 // Use LARL to load the address of the constant pool entry.
4018 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4019}
4020
4021SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
4022 SelectionDAG &DAG) const {
4023 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4025 MachineFrameInfo &MFI = MF.getFrameInfo();
4026 MFI.setFrameAddressIsTaken(true);
4027
4028 SDLoc DL(Op);
4029 unsigned Depth = Op.getConstantOperandVal(0);
4030 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4031
4032 // By definition, the frame address is the address of the back chain. (In
4033 // the case of packed stack without backchain, return the address where the
4034 // backchain would have been stored. This will either be an unused space or
4035 // contain a saved register).
4036 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
4037 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
4038
4039 if (Depth > 0) {
4040 // FIXME The frontend should detect this case.
4041 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4042 report_fatal_error("Unsupported stack frame traversal count");
4043
4044 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
4045 while (Depth--) {
4046 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
4048 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
4049 }
4050 }
4051
4052 return BackChain;
4053}
4054
4055SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
4056 SelectionDAG &DAG) const {
4058 MachineFrameInfo &MFI = MF.getFrameInfo();
4059 MFI.setReturnAddressIsTaken(true);
4060
4062 return SDValue();
4063
4064 SDLoc DL(Op);
4065 unsigned Depth = Op.getConstantOperandVal(0);
4066 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4067
4068 if (Depth > 0) {
4069 // FIXME The frontend should detect this case.
4070 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4071 report_fatal_error("Unsupported stack frame traversal count");
4072
4073 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
4074 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4075 int Offset = TFL->getReturnAddressOffset(MF);
4076 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
4077 DAG.getSignedConstant(Offset, DL, PtrVT));
4078 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
4080 }
4081
4082 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an
4083 // implicit live-in.
4086 &SystemZ::GR64BitRegClass);
4087 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
4088}
4089
4090SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
4091 SelectionDAG &DAG) const {
4092 SDLoc DL(Op);
4093 SDValue In = Op.getOperand(0);
4094 EVT InVT = In.getValueType();
4095 EVT ResVT = Op.getValueType();
4096
4097 // Convert loads directly. This is normally done by DAGCombiner,
4098 // but we need this case for bitcasts that are created during lowering
4099 // and which are then lowered themselves.
4100 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
4101 if (ISD::isNormalLoad(LoadN)) {
4102 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
4103 LoadN->getBasePtr(), LoadN->getMemOperand());
4104 // Update the chain uses.
4105 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
4106 return NewLoad;
4107 }
4108
4109 if (InVT == MVT::i32 && ResVT == MVT::f32) {
4110 SDValue In64;
4111 if (Subtarget.hasHighWord()) {
4112 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
4113 MVT::i64);
4114 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4115 MVT::i64, SDValue(U64, 0), In);
4116 } else {
4117 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
4118 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
4119 DAG.getConstant(32, DL, MVT::i64));
4120 }
4121 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
4122 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
4123 DL, MVT::f32, Out64);
4124 }
4125 if (InVT == MVT::f32 && ResVT == MVT::i32) {
4126 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
4127 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4128 MVT::f64, SDValue(U64, 0), In);
4129 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
4130 if (Subtarget.hasHighWord())
4131 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
4132 MVT::i32, Out64);
4133 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
4134 DAG.getConstant(32, DL, MVT::i64));
4135 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
4136 }
4137 llvm_unreachable("Unexpected bitcast combination");
4138}
4139
4140SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
4141 SelectionDAG &DAG) const {
4142
4143 if (Subtarget.isTargetXPLINK64())
4144 return lowerVASTART_XPLINK(Op, DAG);
4145 else
4146 return lowerVASTART_ELF(Op, DAG);
4147}
4148
4149SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
4150 SelectionDAG &DAG) const {
4152 SystemZMachineFunctionInfo *FuncInfo =
4154
4155 SDLoc DL(Op);
4156
4157 // vastart just stores the address of the VarArgsFrameIndex slot into the
4158 // memory location argument.
4159 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4160 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4161 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4162 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4163 MachinePointerInfo(SV));
4164}
4165
4166SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
4167 SelectionDAG &DAG) const {
4169 SystemZMachineFunctionInfo *FuncInfo =
4171 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4172
4173 SDValue Chain = Op.getOperand(0);
4174 SDValue Addr = Op.getOperand(1);
4175 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4176 SDLoc DL(Op);
4177
4178 // The initial values of each field.
4179 const unsigned NumFields = 4;
4180 SDValue Fields[NumFields] = {
4181 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
4182 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
4183 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
4184 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
4185 };
4186
4187 // Store each field into its respective slot.
4188 SDValue MemOps[NumFields];
4189 unsigned Offset = 0;
4190 for (unsigned I = 0; I < NumFields; ++I) {
4191 SDValue FieldAddr = Addr;
4192 if (Offset != 0)
4193 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
4195 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
4197 Offset += 8;
4198 }
4199 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4200}
4201
4202SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
4203 SelectionDAG &DAG) const {
4204 SDValue Chain = Op.getOperand(0);
4205 SDValue DstPtr = Op.getOperand(1);
4206 SDValue SrcPtr = Op.getOperand(2);
4207 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
4208 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4209 SDLoc DL(Op);
4210
4211 uint32_t Sz =
4212 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
4213 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
4214 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
4215 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV),
4216 MachinePointerInfo(SrcSV));
4217}
4218
4219SDValue
4220SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
4221 SelectionDAG &DAG) const {
4222 if (Subtarget.isTargetXPLINK64())
4223 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
4224 else
4225 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
4226}
4227
4228SDValue
4229SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
4230 SelectionDAG &DAG) const {
4231 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4233 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4234 SDValue Chain = Op.getOperand(0);
4235 SDValue Size = Op.getOperand(1);
4236 SDValue Align = Op.getOperand(2);
4237 SDLoc DL(Op);
4238
4239 // If user has set the no alignment function attribute, ignore
4240 // alloca alignments.
4241 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4242
4243 uint64_t StackAlign = TFI->getStackAlignment();
4244 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4245 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4246
4247 SDValue NeededSpace = Size;
4248
4249 // Add extra space for alignment if needed.
4250 EVT PtrVT = getPointerTy(MF.getDataLayout());
4251 if (ExtraAlignSpace)
4252 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
4253 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4254
4255 bool IsSigned = false;
4256 bool DoesNotReturn = false;
4257 bool IsReturnValueUsed = false;
4258 EVT VT = Op.getValueType();
4259 SDValue AllocaCall =
4260 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
4261 CallingConv::C, IsSigned, DL, DoesNotReturn,
4262 IsReturnValueUsed)
4263 .first;
4264
4265 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4266 // to end of call in order to ensure it isn't broken up from the call
4267 // sequence.
4268 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4269 Register SPReg = Regs.getStackPointerRegister();
4270 Chain = AllocaCall.getValue(1);
4271 SDValue Glue = AllocaCall.getValue(2);
4272 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4273 Chain = NewSPRegNode.getValue(1);
4274
4275 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4276 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4277 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4278
4279 // Dynamically realign if needed.
4280 if (ExtraAlignSpace) {
4281 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4282 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4283 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4284 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4285 }
4286
4287 SDValue Ops[2] = {Result, Chain};
4288 return DAG.getMergeValues(Ops, DL);
4289}
4290
4291SDValue
4292SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4293 SelectionDAG &DAG) const {
4294 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4296 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4297 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4298
4299 SDValue Chain = Op.getOperand(0);
4300 SDValue Size = Op.getOperand(1);
4301 SDValue Align = Op.getOperand(2);
4302 SDLoc DL(Op);
4303
4304 // If user has set the no alignment function attribute, ignore
4305 // alloca alignments.
4306 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4307
4308 uint64_t StackAlign = TFI->getStackAlignment();
4309 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4310 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4311
4313 SDValue NeededSpace = Size;
4314
4315 // Get a reference to the stack pointer.
4316 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4317
4318 // If we need a backchain, save it now.
4319 SDValue Backchain;
4320 if (StoreBackchain)
4321 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4323
4324 // Add extra space for alignment if needed.
4325 if (ExtraAlignSpace)
4326 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4327 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4328
4329 // Get the new stack pointer value.
4330 SDValue NewSP;
4331 if (hasInlineStackProbe(MF)) {
4333 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4334 Chain = NewSP.getValue(1);
4335 }
4336 else {
4337 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4338 // Copy the new stack pointer back.
4339 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4340 }
4341
4342 // The allocated data lives above the 160 bytes allocated for the standard
4343 // frame, plus any outgoing stack arguments. We don't know how much that
4344 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4345 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4346 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4347
4348 // Dynamically realign if needed.
4349 if (RequiredAlign > StackAlign) {
4350 Result =
4351 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4352 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4353 Result =
4354 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4355 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4356 }
4357
4358 if (StoreBackchain)
4359 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4361
4362 SDValue Ops[2] = { Result, Chain };
4363 return DAG.getMergeValues(Ops, DL);
4364}
4365
4366SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4367 SDValue Op, SelectionDAG &DAG) const {
4368 SDLoc DL(Op);
4369
4370 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4371}
4372
4373SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4374 SelectionDAG &DAG) const {
4375 EVT VT = Op.getValueType();
4376 SDLoc DL(Op);
4377 SDValue Ops[2];
4378 if (is32Bit(VT))
4379 // Just do a normal 64-bit multiplication and extract the results.
4380 // We define this so that it can be used for constant division.
4381 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4382 Op.getOperand(1), Ops[1], Ops[0]);
4383 else if (Subtarget.hasMiscellaneousExtensions2())
4384 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4385 // the high result in the even register. ISD::SMUL_LOHI is defined to
4386 // return the low half first, so the results are in reverse order.
4388 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4389 else {
4390 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4391 //
4392 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4393 //
4394 // but using the fact that the upper halves are either all zeros
4395 // or all ones:
4396 //
4397 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4398 //
4399 // and grouping the right terms together since they are quicker than the
4400 // multiplication:
4401 //
4402 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4403 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4404 SDValue LL = Op.getOperand(0);
4405 SDValue RL = Op.getOperand(1);
4406 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4407 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4408 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4409 // the high result in the even register. ISD::SMUL_LOHI is defined to
4410 // return the low half first, so the results are in reverse order.
4412 LL, RL, Ops[1], Ops[0]);
4413 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4414 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4415 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4416 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4417 }
4418 return DAG.getMergeValues(Ops, DL);
4419}
4420
4421SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4422 SelectionDAG &DAG) const {
4423 EVT VT = Op.getValueType();
4424 SDLoc DL(Op);
4425 SDValue Ops[2];
4426 if (is32Bit(VT))
4427 // Just do a normal 64-bit multiplication and extract the results.
4428 // We define this so that it can be used for constant division.
4429 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4430 Op.getOperand(1), Ops[1], Ops[0]);
4431 else
4432 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4433 // the high result in the even register. ISD::UMUL_LOHI is defined to
4434 // return the low half first, so the results are in reverse order.
4436 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4437 return DAG.getMergeValues(Ops, DL);
4438}
4439
4440SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4441 SelectionDAG &DAG) const {
4442 SDValue Op0 = Op.getOperand(0);
4443 SDValue Op1 = Op.getOperand(1);
4444 EVT VT = Op.getValueType();
4445 SDLoc DL(Op);
4446
4447 // We use DSGF for 32-bit division. This means the first operand must
4448 // always be 64-bit, and the second operand should be 32-bit whenever
4449 // that is possible, to improve performance.
4450 if (is32Bit(VT))
4451 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4452 else if (DAG.ComputeNumSignBits(Op1) > 32)
4453 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4454
4455 // DSG(F) returns the remainder in the even register and the
4456 // quotient in the odd register.
4457 SDValue Ops[2];
4458 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4459 return DAG.getMergeValues(Ops, DL);
4460}
4461
4462SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4463 SelectionDAG &DAG) const {
4464 EVT VT = Op.getValueType();
4465 SDLoc DL(Op);
4466
4467 // DL(G) returns the remainder in the even register and the
4468 // quotient in the odd register.
4469 SDValue Ops[2];
4471 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4472 return DAG.getMergeValues(Ops, DL);
4473}
4474
4475SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4476 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4477
4478 // Get the known-zero masks for each operand.
4479 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4480 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4481 DAG.computeKnownBits(Ops[1])};
4482
4483 // See if the upper 32 bits of one operand and the lower 32 bits of the
4484 // other are known zero. They are the low and high operands respectively.
4485 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4486 Known[1].Zero.getZExtValue() };
4487 unsigned High, Low;
4488 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4489 High = 1, Low = 0;
4490 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4491 High = 0, Low = 1;
4492 else
4493 return Op;
4494
4495 SDValue LowOp = Ops[Low];
4496 SDValue HighOp = Ops[High];
4497
4498 // If the high part is a constant, we're better off using IILH.
4499 if (HighOp.getOpcode() == ISD::Constant)
4500 return Op;
4501
4502 // If the low part is a constant that is outside the range of LHI,
4503 // then we're better off using IILF.
4504 if (LowOp.getOpcode() == ISD::Constant) {
4505 int64_t Value = int32_t(LowOp->getAsZExtVal());
4506 if (!isInt<16>(Value))
4507 return Op;
4508 }
4509
4510 // Check whether the high part is an AND that doesn't change the
4511 // high 32 bits and just masks out low bits. We can skip it if so.
4512 if (HighOp.getOpcode() == ISD::AND &&
4513 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4514 SDValue HighOp0 = HighOp.getOperand(0);
4516 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4517 HighOp = HighOp0;
4518 }
4519
4520 // Take advantage of the fact that all GR32 operations only change the
4521 // low 32 bits by truncating Low to an i32 and inserting it directly
4522 // using a subreg. The interesting cases are those where the truncation
4523 // can be folded.
4524 SDLoc DL(Op);
4525 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4526 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4527 MVT::i64, HighOp, Low32);
4528}
4529
4530// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4531SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4532 SelectionDAG &DAG) const {
4533 SDNode *N = Op.getNode();
4534 SDValue LHS = N->getOperand(0);
4535 SDValue RHS = N->getOperand(1);
4536 SDLoc DL(N);
4537
4538 if (N->getValueType(0) == MVT::i128) {
4539 unsigned BaseOp = 0;
4540 unsigned FlagOp = 0;
4541 bool IsBorrow = false;
4542 switch (Op.getOpcode()) {
4543 default: llvm_unreachable("Unknown instruction!");
4544 case ISD::UADDO:
4545 BaseOp = ISD::ADD;
4546 FlagOp = SystemZISD::VACC;
4547 break;
4548 case ISD::USUBO:
4549 BaseOp = ISD::SUB;
4550 FlagOp = SystemZISD::VSCBI;
4551 IsBorrow = true;
4552 break;
4553 }
4554 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4555 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4556 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4557 DAG.getValueType(MVT::i1));
4558 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4559 if (IsBorrow)
4560 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4561 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4562 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4563 }
4564
4565 unsigned BaseOp = 0;
4566 unsigned CCValid = 0;
4567 unsigned CCMask = 0;
4568
4569 switch (Op.getOpcode()) {
4570 default: llvm_unreachable("Unknown instruction!");
4571 case ISD::SADDO:
4572 BaseOp = SystemZISD::SADDO;
4573 CCValid = SystemZ::CCMASK_ARITH;
4575 break;
4576 case ISD::SSUBO:
4577 BaseOp = SystemZISD::SSUBO;
4578 CCValid = SystemZ::CCMASK_ARITH;
4580 break;
4581 case ISD::UADDO:
4582 BaseOp = SystemZISD::UADDO;
4583 CCValid = SystemZ::CCMASK_LOGICAL;
4585 break;
4586 case ISD::USUBO:
4587 BaseOp = SystemZISD::USUBO;
4588 CCValid = SystemZ::CCMASK_LOGICAL;
4590 break;
4591 }
4592
4593 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4594 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4595
4596 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4597 if (N->getValueType(1) == MVT::i1)
4598 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4599
4600 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4601}
4602
4603static bool isAddCarryChain(SDValue Carry) {
4604 while (Carry.getOpcode() == ISD::UADDO_CARRY)
4605 Carry = Carry.getOperand(2);
4606 return Carry.getOpcode() == ISD::UADDO;
4607}
4608
4609static bool isSubBorrowChain(SDValue Carry) {
4610 while (Carry.getOpcode() == ISD::USUBO_CARRY)
4611 Carry = Carry.getOperand(2);
4612 return Carry.getOpcode() == ISD::USUBO;
4613}
4614
4615// Lower UADDO_CARRY/USUBO_CARRY nodes.
4616SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4617 SelectionDAG &DAG) const {
4618
4619 SDNode *N = Op.getNode();
4620 MVT VT = N->getSimpleValueType(0);
4621
4622 // Let legalize expand this if it isn't a legal type yet.
4623 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4624 return SDValue();
4625
4626 SDValue LHS = N->getOperand(0);
4627 SDValue RHS = N->getOperand(1);
4628 SDValue Carry = Op.getOperand(2);
4629 SDLoc DL(N);
4630
4631 if (VT == MVT::i128) {
4632 unsigned BaseOp = 0;
4633 unsigned FlagOp = 0;
4634 bool IsBorrow = false;
4635 switch (Op.getOpcode()) {
4636 default: llvm_unreachable("Unknown instruction!");
4637 case ISD::UADDO_CARRY:
4638 BaseOp = SystemZISD::VAC;
4639 FlagOp = SystemZISD::VACCC;
4640 break;
4641 case ISD::USUBO_CARRY:
4642 BaseOp = SystemZISD::VSBI;
4643 FlagOp = SystemZISD::VSBCBI;
4644 IsBorrow = true;
4645 break;
4646 }
4647 if (IsBorrow)
4648 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4649 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
4650 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
4651 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
4652 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
4653 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4654 DAG.getValueType(MVT::i1));
4655 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4656 if (IsBorrow)
4657 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4658 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4659 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4660 }
4661
4662 unsigned BaseOp = 0;
4663 unsigned CCValid = 0;
4664 unsigned CCMask = 0;
4665
4666 switch (Op.getOpcode()) {
4667 default: llvm_unreachable("Unknown instruction!");
4668 case ISD::UADDO_CARRY:
4669 if (!isAddCarryChain(Carry))
4670 return SDValue();
4671
4672 BaseOp = SystemZISD::ADDCARRY;
4673 CCValid = SystemZ::CCMASK_LOGICAL;
4675 break;
4676 case ISD::USUBO_CARRY:
4677 if (!isSubBorrowChain(Carry))
4678 return SDValue();
4679
4680 BaseOp = SystemZISD::SUBCARRY;
4681 CCValid = SystemZ::CCMASK_LOGICAL;
4683 break;
4684 }
4685
4686 // Set the condition code from the carry flag.
4687 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4688 DAG.getConstant(CCValid, DL, MVT::i32),
4689 DAG.getConstant(CCMask, DL, MVT::i32));
4690
4691 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4692 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4693
4694 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4695 if (N->getValueType(1) == MVT::i1)
4696 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4697
4698 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4699}
4700
4701SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4702 SelectionDAG &DAG) const {
4703 EVT VT = Op.getValueType();
4704 SDLoc DL(Op);
4705 Op = Op.getOperand(0);
4706
4707 if (VT.getScalarSizeInBits() == 128) {
4708 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
4709 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
4710 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
4711 DAG.getConstant(0, DL, MVT::i64));
4712 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4713 return Op;
4714 }
4715
4716 // Handle vector types via VPOPCT.
4717 if (VT.isVector()) {
4718 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4719 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4720 switch (VT.getScalarSizeInBits()) {
4721 case 8:
4722 break;
4723 case 16: {
4724 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4725 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
4726 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
4727 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4728 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
4729 break;
4730 }
4731 case 32: {
4732 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4733 DAG.getConstant(0, DL, MVT::i32));
4734 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4735 break;
4736 }
4737 case 64: {
4738 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4739 DAG.getConstant(0, DL, MVT::i32));
4740 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
4741 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4742 break;
4743 }
4744 default:
4745 llvm_unreachable("Unexpected type");
4746 }
4747 return Op;
4748 }
4749
4750 // Get the known-zero mask for the operand.
4751 KnownBits Known = DAG.computeKnownBits(Op);
4752 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
4753 if (NumSignificantBits == 0)
4754 return DAG.getConstant(0, DL, VT);
4755
4756 // Skip known-zero high parts of the operand.
4757 int64_t OrigBitSize = VT.getSizeInBits();
4758 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
4759 BitSize = std::min(BitSize, OrigBitSize);
4760
4761 // The POPCNT instruction counts the number of bits in each byte.
4762 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
4763 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
4764 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
4765
4766 // Add up per-byte counts in a binary tree. All bits of Op at
4767 // position larger than BitSize remain zero throughout.
4768 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
4769 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
4770 if (BitSize != OrigBitSize)
4771 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
4772 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
4773 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4774 }
4775
4776 // Extract overall result from high byte.
4777 if (BitSize > 8)
4778 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
4779 DAG.getConstant(BitSize - 8, DL, VT));
4780
4781 return Op;
4782}
4783
4784SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
4785 SelectionDAG &DAG) const {
4786 SDLoc DL(Op);
4787 AtomicOrdering FenceOrdering =
4788 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4789 SyncScope::ID FenceSSID =
4790 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4791
4792 // The only fence that needs an instruction is a sequentially-consistent
4793 // cross-thread fence.
4794 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
4795 FenceSSID == SyncScope::System) {
4796 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
4797 Op.getOperand(0)),
4798 0);
4799 }
4800
4801 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4802 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
4803}
4804
4805SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
4806 SelectionDAG &DAG) const {
4807 auto *Node = cast<AtomicSDNode>(Op.getNode());
4808 assert(
4809 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
4810 "Only custom lowering i128 or f128.");
4811 // Use same code to handle both legal and non-legal i128 types.
4814 return DAG.getMergeValues(Results, SDLoc(Op));
4815}
4816
4817// Prepare for a Compare And Swap for a subword operation. This needs to be
4818// done in memory with 4 bytes at natural alignment.
4820 SDValue &AlignedAddr, SDValue &BitShift,
4821 SDValue &NegBitShift) {
4822 EVT PtrVT = Addr.getValueType();
4823 EVT WideVT = MVT::i32;
4824
4825 // Get the address of the containing word.
4826 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4827 DAG.getSignedConstant(-4, DL, PtrVT));
4828
4829 // Get the number of bits that the word must be rotated left in order
4830 // to bring the field to the top bits of a GR32.
4831 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4832 DAG.getConstant(3, DL, PtrVT));
4833 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4834
4835 // Get the complementing shift amount, for rotating a field in the top
4836 // bits back to its proper position.
4837 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4838 DAG.getConstant(0, DL, WideVT), BitShift);
4839
4840}
4841
4842// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
4843// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
4844SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
4845 SelectionDAG &DAG,
4846 unsigned Opcode) const {
4847 auto *Node = cast<AtomicSDNode>(Op.getNode());
4848
4849 // 32-bit operations need no special handling.
4850 EVT NarrowVT = Node->getMemoryVT();
4851 EVT WideVT = MVT::i32;
4852 if (NarrowVT == WideVT)
4853 return Op;
4854
4855 int64_t BitSize = NarrowVT.getSizeInBits();
4856 SDValue ChainIn = Node->getChain();
4857 SDValue Addr = Node->getBasePtr();
4858 SDValue Src2 = Node->getVal();
4859 MachineMemOperand *MMO = Node->getMemOperand();
4860 SDLoc DL(Node);
4861
4862 // Convert atomic subtracts of constants into additions.
4863 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
4864 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
4866 Src2 = DAG.getSignedConstant(-Const->getSExtValue(), DL,
4867 Src2.getValueType());
4868 }
4869
4870 SDValue AlignedAddr, BitShift, NegBitShift;
4871 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4872
4873 // Extend the source operand to 32 bits and prepare it for the inner loop.
4874 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
4875 // operations require the source to be shifted in advance. (This shift
4876 // can be folded if the source is constant.) For AND and NAND, the lower
4877 // bits must be set, while for other opcodes they should be left clear.
4878 if (Opcode != SystemZISD::ATOMIC_SWAPW)
4879 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
4880 DAG.getConstant(32 - BitSize, DL, WideVT));
4881 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
4883 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
4884 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
4885
4886 // Construct the ATOMIC_LOADW_* node.
4887 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
4888 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
4889 DAG.getConstant(BitSize, DL, WideVT) };
4890 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
4891 NarrowVT, MMO);
4892
4893 // Rotate the result of the final CS so that the field is in the lower
4894 // bits of a GR32, then truncate it.
4895 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
4896 DAG.getConstant(BitSize, DL, WideVT));
4897 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
4898
4899 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
4900 return DAG.getMergeValues(RetOps, DL);
4901}
4902
4903// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
4904// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
4905SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
4906 SelectionDAG &DAG) const {
4907 auto *Node = cast<AtomicSDNode>(Op.getNode());
4908 EVT MemVT = Node->getMemoryVT();
4909 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
4910 // A full-width operation: negate and use LAA(G).
4911 assert(Op.getValueType() == MemVT && "Mismatched VTs");
4912 assert(Subtarget.hasInterlockedAccess1() &&
4913 "Should have been expanded by AtomicExpand pass.");
4914 SDValue Src2 = Node->getVal();
4915 SDLoc DL(Src2);
4916 SDValue NegSrc2 =
4917 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
4918 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
4919 Node->getChain(), Node->getBasePtr(), NegSrc2,
4920 Node->getMemOperand());
4921 }
4922
4923 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
4924}
4925
4926// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
4927SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
4928 SelectionDAG &DAG) const {
4929 auto *Node = cast<AtomicSDNode>(Op.getNode());
4930 SDValue ChainIn = Node->getOperand(0);
4931 SDValue Addr = Node->getOperand(1);
4932 SDValue CmpVal = Node->getOperand(2);
4933 SDValue SwapVal = Node->getOperand(3);
4934 MachineMemOperand *MMO = Node->getMemOperand();
4935 SDLoc DL(Node);
4936
4937 if (Node->getMemoryVT() == MVT::i128) {
4938 // Use same code to handle both legal and non-legal i128 types.
4941 return DAG.getMergeValues(Results, DL);
4942 }
4943
4944 // We have native support for 32-bit and 64-bit compare and swap, but we
4945 // still need to expand extracting the "success" result from the CC.
4946 EVT NarrowVT = Node->getMemoryVT();
4947 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
4948 if (NarrowVT == WideVT) {
4949 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4950 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
4952 DL, Tys, Ops, NarrowVT, MMO);
4953 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4955
4956 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
4957 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4958 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4959 return SDValue();
4960 }
4961
4962 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
4963 // via a fullword ATOMIC_CMP_SWAPW operation.
4964 int64_t BitSize = NarrowVT.getSizeInBits();
4965
4966 SDValue AlignedAddr, BitShift, NegBitShift;
4967 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4968
4969 // Construct the ATOMIC_CMP_SWAPW node.
4970 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4971 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
4972 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
4974 VTList, Ops, NarrowVT, MMO);
4975 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4977
4978 // emitAtomicCmpSwapW() will zero extend the result (original value).
4979 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
4980 DAG.getValueType(NarrowVT));
4981 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
4982 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4983 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4984 return SDValue();
4985}
4986
4988SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
4989 // Because of how we convert atomic_load and atomic_store to normal loads and
4990 // stores in the DAG, we need to ensure that the MMOs are marked volatile
4991 // since DAGCombine hasn't been updated to account for atomic, but non
4992 // volatile loads. (See D57601)
4993 if (auto *SI = dyn_cast<StoreInst>(&I))
4994 if (SI->isAtomic())
4996 if (auto *LI = dyn_cast<LoadInst>(&I))
4997 if (LI->isAtomic())
4999 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
5000 if (AI->isAtomic())
5002 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
5003 if (AI->isAtomic())
5006}
5007
5008SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
5009 SelectionDAG &DAG) const {
5011 auto *Regs = Subtarget.getSpecialRegisters();
5013 report_fatal_error("Variable-sized stack allocations are not supported "
5014 "in GHC calling convention");
5015 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
5016 Regs->getStackPointerRegister(), Op.getValueType());
5017}
5018
5019SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
5020 SelectionDAG &DAG) const {
5022 auto *Regs = Subtarget.getSpecialRegisters();
5023 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
5024
5026 report_fatal_error("Variable-sized stack allocations are not supported "
5027 "in GHC calling convention");
5028
5029 SDValue Chain = Op.getOperand(0);
5030 SDValue NewSP = Op.getOperand(1);
5031 SDValue Backchain;
5032 SDLoc DL(Op);
5033
5034 if (StoreBackchain) {
5035 SDValue OldSP = DAG.getCopyFromReg(
5036 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
5037 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
5039 }
5040
5041 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
5042
5043 if (StoreBackchain)
5044 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
5046
5047 return Chain;
5048}
5049
5050SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
5051 SelectionDAG &DAG) const {
5052 bool IsData = Op.getConstantOperandVal(4);
5053 if (!IsData)
5054 // Just preserve the chain.
5055 return Op.getOperand(0);
5056
5057 SDLoc DL(Op);
5058 bool IsWrite = Op.getConstantOperandVal(2);
5059 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
5060 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
5061 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
5062 Op.getOperand(1)};
5064 Node->getVTList(), Ops,
5065 Node->getMemoryVT(), Node->getMemOperand());
5066}
5067
5068// Convert condition code in CCReg to an i32 value.
5070 SDLoc DL(CCReg);
5071 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
5072 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
5073 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
5074}
5075
5076SDValue
5077SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
5078 SelectionDAG &DAG) const {
5079 unsigned Opcode, CCValid;
5080 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
5081 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
5082 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
5083 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
5084 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
5085 return SDValue();
5086 }
5087
5088 return SDValue();
5089}
5090
5091SDValue
5092SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
5093 SelectionDAG &DAG) const {
5094 unsigned Opcode, CCValid;
5095 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
5096 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
5097 if (Op->getNumValues() == 1)
5098 return getCCResult(DAG, SDValue(Node, 0));
5099 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
5100 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
5101 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
5102 }
5103
5104 unsigned Id = Op.getConstantOperandVal(0);
5105 switch (Id) {
5106 case Intrinsic::thread_pointer:
5107 return lowerThreadPointer(SDLoc(Op), DAG);
5108
5109 case Intrinsic::s390_vpdi:
5110 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
5111 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5112
5113 case Intrinsic::s390_vperm:
5114 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
5115 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5116
5117 case Intrinsic::s390_vuphb:
5118 case Intrinsic::s390_vuphh:
5119 case Intrinsic::s390_vuphf:
5120 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
5121 Op.getOperand(1));
5122
5123 case Intrinsic::s390_vuplhb:
5124 case Intrinsic::s390_vuplhh:
5125 case Intrinsic::s390_vuplhf:
5126 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
5127 Op.getOperand(1));
5128
5129 case Intrinsic::s390_vuplb:
5130 case Intrinsic::s390_vuplhw:
5131 case Intrinsic::s390_vuplf:
5132 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
5133 Op.getOperand(1));
5134
5135 case Intrinsic::s390_vupllb:
5136 case Intrinsic::s390_vupllh:
5137 case Intrinsic::s390_vupllf:
5138 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
5139 Op.getOperand(1));
5140
5141 case Intrinsic::s390_vsumb:
5142 case Intrinsic::s390_vsumh:
5143 case Intrinsic::s390_vsumgh:
5144 case Intrinsic::s390_vsumgf:
5145 case Intrinsic::s390_vsumqf:
5146 case Intrinsic::s390_vsumqg:
5147 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
5148 Op.getOperand(1), Op.getOperand(2));
5149
5150 case Intrinsic::s390_vaq:
5151 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5152 Op.getOperand(1), Op.getOperand(2));
5153 case Intrinsic::s390_vaccb:
5154 case Intrinsic::s390_vacch:
5155 case Intrinsic::s390_vaccf:
5156 case Intrinsic::s390_vaccg:
5157 case Intrinsic::s390_vaccq:
5158 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
5159 Op.getOperand(1), Op.getOperand(2));
5160 case Intrinsic::s390_vacq:
5161 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
5162 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5163 case Intrinsic::s390_vacccq:
5164 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
5165 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5166
5167 case Intrinsic::s390_vsq:
5168 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
5169 Op.getOperand(1), Op.getOperand(2));
5170 case Intrinsic::s390_vscbib:
5171 case Intrinsic::s390_vscbih:
5172 case Intrinsic::s390_vscbif:
5173 case Intrinsic::s390_vscbig:
5174 case Intrinsic::s390_vscbiq:
5175 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
5176 Op.getOperand(1), Op.getOperand(2));
5177 case Intrinsic::s390_vsbiq:
5178 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
5179 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5180 case Intrinsic::s390_vsbcbiq:
5181 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
5182 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5183 }
5184
5185 return SDValue();
5186}
5187
5188namespace {
5189// Says that SystemZISD operation Opcode can be used to perform the equivalent
5190// of a VPERM with permute vector Bytes. If Opcode takes three operands,
5191// Operand is the constant third operand, otherwise it is the number of
5192// bytes in each element of the result.
5193struct Permute {
5194 unsigned Opcode;
5195 unsigned Operand;
5196 unsigned char Bytes[SystemZ::VectorBytes];
5197};
5198}
5199
5200static const Permute PermuteForms[] = {
5201 // VMRHG
5203 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
5204 // VMRHF
5206 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
5207 // VMRHH
5209 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
5210 // VMRHB
5212 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
5213 // VMRLG
5215 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
5216 // VMRLF
5218 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
5219 // VMRLH
5221 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
5222 // VMRLB
5224 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
5225 // VPKG
5226 { SystemZISD::PACK, 4,
5227 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
5228 // VPKF
5229 { SystemZISD::PACK, 2,
5230 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
5231 // VPKH
5232 { SystemZISD::PACK, 1,
5233 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
5234 // VPDI V1, V2, 4 (low half of V1, high half of V2)
5236 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
5237 // VPDI V1, V2, 1 (high half of V1, low half of V2)
5239 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
5240};
5241
5242// Called after matching a vector shuffle against a particular pattern.
5243// Both the original shuffle and the pattern have two vector operands.
5244// OpNos[0] is the operand of the original shuffle that should be used for
5245// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
5246// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
5247// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
5248// for operands 0 and 1 of the pattern.
5249static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
5250 if (OpNos[0] < 0) {
5251 if (OpNos[1] < 0)
5252 return false;
5253 OpNo0 = OpNo1 = OpNos[1];
5254 } else if (OpNos[1] < 0) {
5255 OpNo0 = OpNo1 = OpNos[0];
5256 } else {
5257 OpNo0 = OpNos[0];
5258 OpNo1 = OpNos[1];
5259 }
5260 return true;
5261}
5262
5263// Bytes is a VPERM-like permute vector, except that -1 is used for
5264// undefined bytes. Return true if the VPERM can be implemented using P.
5265// When returning true set OpNo0 to the VPERM operand that should be
5266// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5267//
5268// For example, if swapping the VPERM operands allows P to match, OpNo0
5269// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5270// operand, but rewriting it to use two duplicated operands allows it to
5271// match P, then OpNo0 and OpNo1 will be the same.
5272static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5273 unsigned &OpNo0, unsigned &OpNo1) {
5274 int OpNos[] = { -1, -1 };
5275 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5276 int Elt = Bytes[I];
5277 if (Elt >= 0) {
5278 // Make sure that the two permute vectors use the same suboperand
5279 // byte number. Only the operand numbers (the high bits) are
5280 // allowed to differ.
5281 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5282 return false;
5283 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5284 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5285 // Make sure that the operand mappings are consistent with previous
5286 // elements.
5287 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5288 return false;
5289 OpNos[ModelOpNo] = RealOpNo;
5290 }
5291 }
5292 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5293}
5294
5295// As above, but search for a matching permute.
5296static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5297 unsigned &OpNo0, unsigned &OpNo1) {
5298 for (auto &P : PermuteForms)
5299 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5300 return &P;
5301 return nullptr;
5302}
5303
5304// Bytes is a VPERM-like permute vector, except that -1 is used for
5305// undefined bytes. This permute is an operand of an outer permute.
5306// See whether redistributing the -1 bytes gives a shuffle that can be
5307// implemented using P. If so, set Transform to a VPERM-like permute vector
5308// that, when applied to the result of P, gives the original permute in Bytes.
5310 const Permute &P,
5311 SmallVectorImpl<int> &Transform) {
5312 unsigned To = 0;
5313 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5314 int Elt = Bytes[From];
5315 if (Elt < 0)
5316 // Byte number From of the result is undefined.
5317 Transform[From] = -1;
5318 else {
5319 while (P.Bytes[To] != Elt) {
5320 To += 1;
5321 if (To == SystemZ::VectorBytes)
5322 return false;
5323 }
5324 Transform[From] = To;
5325 }
5326 }
5327 return true;
5328}
5329
5330// As above, but search for a matching permute.
5331static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5332 SmallVectorImpl<int> &Transform) {
5333 for (auto &P : PermuteForms)
5334 if (matchDoublePermute(Bytes, P, Transform))
5335 return &P;
5336 return nullptr;
5337}
5338
5339// Convert the mask of the given shuffle op into a byte-level mask,
5340// as if it had type vNi8.
5341static bool getVPermMask(SDValue ShuffleOp,
5342 SmallVectorImpl<int> &Bytes) {
5343 EVT VT = ShuffleOp.getValueType();
5344 unsigned NumElements = VT.getVectorNumElements();
5345 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5346
5347 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5348 Bytes.resize(NumElements * BytesPerElement, -1);
5349 for (unsigned I = 0; I < NumElements; ++I) {
5350 int Index = VSN->getMaskElt(I);
5351 if (Index >= 0)
5352 for (unsigned J = 0; J < BytesPerElement; ++J)
5353 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5354 }
5355 return true;
5356 }
5357 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5358 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5359 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5360 Bytes.resize(NumElements * BytesPerElement, -1);
5361 for (unsigned I = 0; I < NumElements; ++I)
5362 for (unsigned J = 0; J < BytesPerElement; ++J)
5363 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5364 return true;
5365 }
5366 return false;
5367}
5368
5369// Bytes is a VPERM-like permute vector, except that -1 is used for
5370// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5371// the result come from a contiguous sequence of bytes from one input.
5372// Set Base to the selector for the first byte if so.
5373static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5374 unsigned BytesPerElement, int &Base) {
5375 Base = -1;
5376 for (unsigned I = 0; I < BytesPerElement; ++I) {
5377 if (Bytes[Start + I] >= 0) {
5378 unsigned Elem = Bytes[Start + I];
5379 if (Base < 0) {
5380 Base = Elem - I;
5381 // Make sure the bytes would come from one input operand.
5382 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5383 return false;
5384 } else if (unsigned(Base) != Elem - I)
5385 return false;
5386 }
5387 }
5388 return true;
5389}
5390
5391// Bytes is a VPERM-like permute vector, except that -1 is used for
5392// undefined bytes. Return true if it can be performed using VSLDB.
5393// When returning true, set StartIndex to the shift amount and OpNo0
5394// and OpNo1 to the VPERM operands that should be used as the first
5395// and second shift operand respectively.
5397 unsigned &StartIndex, unsigned &OpNo0,
5398 unsigned &OpNo1) {
5399 int OpNos[] = { -1, -1 };
5400 int Shift = -1;
5401 for (unsigned I = 0; I < 16; ++I) {
5402 int Index = Bytes[I];
5403 if (Index >= 0) {
5404 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5405 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5406 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5407 if (Shift < 0)
5408 Shift = ExpectedShift;
5409 else if (Shift != ExpectedShift)
5410 return false;
5411 // Make sure that the operand mappings are consistent with previous
5412 // elements.
5413 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5414 return false;
5415 OpNos[ModelOpNo] = RealOpNo;
5416 }
5417 }
5418 StartIndex = Shift;
5419 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5420}
5421
5422// Create a node that performs P on operands Op0 and Op1, casting the
5423// operands to the appropriate type. The type of the result is determined by P.
5425 const Permute &P, SDValue Op0, SDValue Op1) {
5426 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5427 // elements of a PACK are twice as wide as the outputs.
5428 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5429 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5430 P.Operand);
5431 // Cast both operands to the appropriate type.
5432 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5433 SystemZ::VectorBytes / InBytes);
5434 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5435 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5436 SDValue Op;
5437 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5438 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5439 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5440 } else if (P.Opcode == SystemZISD::PACK) {
5441 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5442 SystemZ::VectorBytes / P.Operand);
5443 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5444 } else {
5445 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5446 }
5447 return Op;
5448}
5449
5450static bool isZeroVector(SDValue N) {
5451 if (N->getOpcode() == ISD::BITCAST)
5452 N = N->getOperand(0);
5453 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5454 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5455 return Op->getZExtValue() == 0;
5456 return ISD::isBuildVectorAllZeros(N.getNode());
5457}
5458
5459// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5460static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5461 for (unsigned I = 0; I < Num ; I++)
5462 if (isZeroVector(Ops[I]))
5463 return I;
5464 return UINT32_MAX;
5465}
5466
5467// Bytes is a VPERM-like permute vector, except that -1 is used for
5468// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5469// VSLDB or VPERM.
5471 SDValue *Ops,
5472 const SmallVectorImpl<int> &Bytes) {
5473 for (unsigned I = 0; I < 2; ++I)
5474 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5475
5476 // First see whether VSLDB can be used.
5477 unsigned StartIndex, OpNo0, OpNo1;
5478 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5479 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5480 Ops[OpNo1],
5481 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5482
5483 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5484 // eliminate a zero vector by reusing any zero index in the permute vector.
5485 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5486 if (ZeroVecIdx != UINT32_MAX) {
5487 bool MaskFirst = true;
5488 int ZeroIdx = -1;
5489 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5490 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5491 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5492 if (OpNo == ZeroVecIdx && I == 0) {
5493 // If the first byte is zero, use mask as first operand.
5494 ZeroIdx = 0;
5495 break;
5496 }
5497 if (OpNo != ZeroVecIdx && Byte == 0) {
5498 // If mask contains a zero, use it by placing that vector first.
5499 ZeroIdx = I + SystemZ::VectorBytes;
5500 MaskFirst = false;
5501 break;
5502 }
5503 }
5504 if (ZeroIdx != -1) {
5505 SDValue IndexNodes[SystemZ::VectorBytes];
5506 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5507 if (Bytes[I] >= 0) {
5508 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5509 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5510 if (OpNo == ZeroVecIdx)
5511 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5512 else {
5513 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5514 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5515 }
5516 } else
5517 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5518 }
5519 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5520 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5521 if (MaskFirst)
5522 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5523 Mask);
5524 else
5525 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5526 Mask);
5527 }
5528 }
5529
5530 SDValue IndexNodes[SystemZ::VectorBytes];
5531 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5532 if (Bytes[I] >= 0)
5533 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5534 else
5535 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5536 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5537 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5538 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5539}
5540
5541namespace {
5542// Describes a general N-operand vector shuffle.
5543struct GeneralShuffle {
5544 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}
5545 void addUndef();
5546 bool add(SDValue, unsigned);
5547 SDValue getNode(SelectionDAG &, const SDLoc &);
5548 void tryPrepareForUnpack();
5549 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5550 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5551
5552 // The operands of the shuffle.
5554
5555 // Index I is -1 if byte I of the result is undefined. Otherwise the
5556 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5557 // Bytes[I] / SystemZ::VectorBytes.
5559
5560 // The type of the shuffle result.
5561 EVT VT;
5562
5563 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5564 unsigned UnpackFromEltSize;
5565};
5566}
5567
5568// Add an extra undefined element to the shuffle.
5569void GeneralShuffle::addUndef() {
5570 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5571 for (unsigned I = 0; I < BytesPerElement; ++I)
5572 Bytes.push_back(-1);
5573}
5574
5575// Add an extra element to the shuffle, taking it from element Elem of Op.
5576// A null Op indicates a vector input whose value will be calculated later;
5577// there is at most one such input per shuffle and it always has the same
5578// type as the result. Aborts and returns false if the source vector elements
5579// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
5580// LLVM they become implicitly extended, but this is rare and not optimized.
5581bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
5582 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5583
5584 // The source vector can have wider elements than the result,
5585 // either through an explicit TRUNCATE or because of type legalization.
5586 // We want the least significant part.
5587 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
5588 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
5589
5590 // Return false if the source elements are smaller than their destination
5591 // elements.
5592 if (FromBytesPerElement < BytesPerElement)
5593 return false;
5594
5595 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
5596 (FromBytesPerElement - BytesPerElement));
5597
5598 // Look through things like shuffles and bitcasts.
5599 while (Op.getNode()) {
5600 if (Op.getOpcode() == ISD::BITCAST)
5601 Op = Op.getOperand(0);
5602 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
5603 // See whether the bytes we need come from a contiguous part of one
5604 // operand.
5606 if (!getVPermMask(Op, OpBytes))
5607 break;
5608 int NewByte;
5609 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
5610 break;
5611 if (NewByte < 0) {
5612 addUndef();
5613 return true;
5614 }
5615 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
5616 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
5617 } else if (Op.isUndef()) {
5618 addUndef();
5619 return true;
5620 } else
5621 break;
5622 }
5623
5624 // Make sure that the source of the extraction is in Ops.
5625 unsigned OpNo = 0;
5626 for (; OpNo < Ops.size(); ++OpNo)
5627 if (Ops[OpNo] == Op)
5628 break;
5629 if (OpNo == Ops.size())
5630 Ops.push_back(Op);
5631
5632 // Add the element to Bytes.
5633 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
5634 for (unsigned I = 0; I < BytesPerElement; ++I)
5635 Bytes.push_back(Base + I);
5636
5637 return true;
5638}
5639
5640// Return SDNodes for the completed shuffle.
5641SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
5642 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
5643
5644 if (Ops.size() == 0)
5645 return DAG.getUNDEF(VT);
5646
5647 // Use a single unpack if possible as the last operation.
5648 tryPrepareForUnpack();
5649
5650 // Make sure that there are at least two shuffle operands.
5651 if (Ops.size() == 1)
5652 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
5653
5654 // Create a tree of shuffles, deferring root node until after the loop.
5655 // Try to redistribute the undefined elements of non-root nodes so that
5656 // the non-root shuffles match something like a pack or merge, then adjust
5657 // the parent node's permute vector to compensate for the new order.
5658 // Among other things, this copes with vectors like <2 x i16> that were
5659 // padded with undefined elements during type legalization.
5660 //
5661 // In the best case this redistribution will lead to the whole tree
5662 // using packs and merges. It should rarely be a loss in other cases.
5663 unsigned Stride = 1;
5664 for (; Stride * 2 < Ops.size(); Stride *= 2) {
5665 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
5666 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
5667
5668 // Create a mask for just these two operands.
5670 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5671 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
5672 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
5673 if (OpNo == I)
5674 NewBytes[J] = Byte;
5675 else if (OpNo == I + Stride)
5676 NewBytes[J] = SystemZ::VectorBytes + Byte;
5677 else
5678 NewBytes[J] = -1;
5679 }
5680 // See if it would be better to reorganize NewMask to avoid using VPERM.
5682 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
5683 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
5684 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
5685 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5686 if (NewBytes[J] >= 0) {
5687 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
5688 "Invalid double permute");
5689 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
5690 } else
5691 assert(NewBytesMap[J] < 0 && "Invalid double permute");
5692 }
5693 } else {
5694 // Just use NewBytes on the operands.
5695 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
5696 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
5697 if (NewBytes[J] >= 0)
5698 Bytes[J] = I * SystemZ::VectorBytes + J;
5699 }
5700 }
5701 }
5702
5703 // Now we just have 2 inputs. Put the second operand in Ops[1].
5704 if (Stride > 1) {
5705 Ops[1] = Ops[Stride];
5706 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5707 if (Bytes[I] >= int(SystemZ::VectorBytes))
5708 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
5709 }
5710
5711 // Look for an instruction that can do the permute without resorting
5712 // to VPERM.
5713 unsigned OpNo0, OpNo1;
5714 SDValue Op;
5715 if (unpackWasPrepared() && Ops[1].isUndef())
5716 Op = Ops[0];
5717 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
5718 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
5719 else
5720 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
5721
5722 Op = insertUnpackIfPrepared(DAG, DL, Op);
5723
5724 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5725}
5726
5727#ifndef NDEBUG
5728static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
5729 dbgs() << Msg.c_str() << " { ";
5730 for (unsigned i = 0; i < Bytes.size(); i++)
5731 dbgs() << Bytes[i] << " ";
5732 dbgs() << "}\n";
5733}
5734#endif
5735
5736// If the Bytes vector matches an unpack operation, prepare to do the unpack
5737// after all else by removing the zero vector and the effect of the unpack on
5738// Bytes.
5739void GeneralShuffle::tryPrepareForUnpack() {
5740 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
5741 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
5742 return;
5743
5744 // Only do this if removing the zero vector reduces the depth, otherwise
5745 // the critical path will increase with the final unpack.
5746 if (Ops.size() > 2 &&
5747 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
5748 return;
5749
5750 // Find an unpack that would allow removing the zero vector from Ops.
5751 UnpackFromEltSize = 1;
5752 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
5753 bool MatchUnpack = true;
5755 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
5756 unsigned ToEltSize = UnpackFromEltSize * 2;
5757 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
5758 if (!IsZextByte)
5759 SrcBytes.push_back(Bytes[Elt]);
5760 if (Bytes[Elt] != -1) {
5761 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
5762 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
5763 MatchUnpack = false;
5764 break;
5765 }
5766 }
5767 }
5768 if (MatchUnpack) {
5769 if (Ops.size() == 2) {
5770 // Don't use unpack if a single source operand needs rearrangement.
5771 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
5772 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
5773 UnpackFromEltSize = UINT_MAX;
5774 return;
5775 }
5776 }
5777 break;
5778 }
5779 }
5780 if (UnpackFromEltSize > 4)
5781 return;
5782
5783 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
5784 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
5785 << ".\n";
5786 dumpBytes(Bytes, "Original Bytes vector:"););
5787
5788 // Apply the unpack in reverse to the Bytes array.
5789 unsigned B = 0;
5790 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
5791 Elt += UnpackFromEltSize;
5792 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
5793 Bytes[B] = Bytes[Elt];
5794 }
5795 while (B < SystemZ::VectorBytes)
5796 Bytes[B++] = -1;
5797
5798 // Remove the zero vector from Ops
5799 Ops.erase(&Ops[ZeroVecOpNo]);
5800 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5801 if (Bytes[I] >= 0) {
5802 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5803 if (OpNo > ZeroVecOpNo)
5804 Bytes[I] -= SystemZ::VectorBytes;
5805 }
5806
5807 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
5808 dbgs() << "\n";);
5809}
5810
5811SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
5812 const SDLoc &DL,
5813 SDValue Op) {
5814 if (!unpackWasPrepared())
5815 return Op;
5816 unsigned InBits = UnpackFromEltSize * 8;
5817 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
5818 SystemZ::VectorBits / InBits);
5819 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
5820 unsigned OutBits = InBits * 2;
5821 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
5822 SystemZ::VectorBits / OutBits);
5823 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
5824}
5825
5826// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
5828 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
5829 if (!Op.getOperand(I).isUndef())
5830 return false;
5831 return true;
5832}
5833
5834// Return a vector of type VT that contains Value in the first element.
5835// The other elements don't matter.
5837 SDValue Value) {
5838 // If we have a constant, replicate it to all elements and let the
5839 // BUILD_VECTOR lowering take care of it.
5840 if (Value.getOpcode() == ISD::Constant ||
5841 Value.getOpcode() == ISD::ConstantFP) {
5843 return DAG.getBuildVector(VT, DL, Ops);
5844 }
5845 if (Value.isUndef())
5846 return DAG.getUNDEF(VT);
5847 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
5848}
5849
5850// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
5851// element 1. Used for cases in which replication is cheap.
5853 SDValue Op0, SDValue Op1) {
5854 if (Op0.isUndef()) {
5855 if (Op1.isUndef())
5856 return DAG.getUNDEF(VT);
5857 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
5858 }
5859 if (Op1.isUndef())
5860 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
5861 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
5862 buildScalarToVector(DAG, DL, VT, Op0),
5863 buildScalarToVector(DAG, DL, VT, Op1));
5864}
5865
5866// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
5867// vector for them.
5869 SDValue Op1) {
5870 if (Op0.isUndef() && Op1.isUndef())
5871 return DAG.getUNDEF(MVT::v2i64);
5872 // If one of the two inputs is undefined then replicate the other one,
5873 // in order to avoid using another register unnecessarily.
5874 if (Op0.isUndef())
5875 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5876 else if (Op1.isUndef())
5877 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5878 else {
5879 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5880 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5881 }
5882 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
5883}
5884
5885// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
5886// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
5887// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
5888// would benefit from this representation and return it if so.
5890 BuildVectorSDNode *BVN) {
5891 EVT VT = BVN->getValueType(0);
5892 unsigned NumElements = VT.getVectorNumElements();
5893
5894 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
5895 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
5896 // need a BUILD_VECTOR, add an additional placeholder operand for that
5897 // BUILD_VECTOR and store its operands in ResidueOps.
5898 GeneralShuffle GS(VT);
5900 bool FoundOne = false;
5901 for (unsigned I = 0; I < NumElements; ++I) {
5902 SDValue Op = BVN->getOperand(I);
5903 if (Op.getOpcode() == ISD::TRUNCATE)
5904 Op = Op.getOperand(0);
5905 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5906 Op.getOperand(1).getOpcode() == ISD::Constant) {
5907 unsigned Elem = Op.getConstantOperandVal(1);
5908 if (!GS.add(Op.getOperand(0), Elem))
5909 return SDValue();
5910 FoundOne = true;
5911 } else if (Op.isUndef()) {
5912 GS.addUndef();
5913 } else {
5914 if (!GS.add(SDValue(), ResidueOps.size()))
5915 return SDValue();
5916 ResidueOps.push_back(BVN->getOperand(I));
5917 }
5918 }
5919
5920 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
5921 if (!FoundOne)
5922 return SDValue();
5923
5924 // Create the BUILD_VECTOR for the remaining elements, if any.
5925 if (!ResidueOps.empty()) {
5926 while (ResidueOps.size() < NumElements)
5927 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
5928 for (auto &Op : GS.Ops) {
5929 if (!Op.getNode()) {
5930 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
5931 break;
5932 }
5933 }
5934 }
5935 return GS.getNode(DAG, SDLoc(BVN));
5936}
5937
5938bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
5939 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
5940 return true;
5941 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
5942 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
5943 return true;
5944 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
5945 return true;
5946 return false;
5947}
5948
5949// Combine GPR scalar values Elems into a vector of type VT.
5950SDValue
5951SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5952 SmallVectorImpl<SDValue> &Elems) const {
5953 // See whether there is a single replicated value.
5955 unsigned int NumElements = Elems.size();
5956 unsigned int Count = 0;
5957 for (auto Elem : Elems) {
5958 if (!Elem.isUndef()) {
5959 if (!Single.getNode())
5960 Single = Elem;
5961 else if (Elem != Single) {
5962 Single = SDValue();
5963 break;
5964 }
5965 Count += 1;
5966 }
5967 }
5968 // There are three cases here:
5969 //
5970 // - if the only defined element is a loaded one, the best sequence
5971 // is a replicating load.
5972 //
5973 // - otherwise, if the only defined element is an i64 value, we will
5974 // end up with the same VLVGP sequence regardless of whether we short-cut
5975 // for replication or fall through to the later code.
5976 //
5977 // - otherwise, if the only defined element is an i32 or smaller value,
5978 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
5979 // This is only a win if the single defined element is used more than once.
5980 // In other cases we're better off using a single VLVGx.
5981 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
5982 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
5983
5984 // If all elements are loads, use VLREP/VLEs (below).
5985 bool AllLoads = true;
5986 for (auto Elem : Elems)
5987 if (!isVectorElementLoad(Elem)) {
5988 AllLoads = false;
5989 break;
5990 }
5991
5992 // The best way of building a v2i64 from two i64s is to use VLVGP.
5993 if (VT == MVT::v2i64 && !AllLoads)
5994 return joinDwords(DAG, DL, Elems[0], Elems[1]);
5995
5996 // Use a 64-bit merge high to combine two doubles.
5997 if (VT == MVT::v2f64 && !AllLoads)
5998 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5999
6000 // Build v4f32 values directly from the FPRs:
6001 //
6002 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
6003 // V V VMRHF
6004 // <ABxx> <CDxx>
6005 // V VMRHG
6006 // <ABCD>
6007 if (VT == MVT::v4f32 && !AllLoads) {
6008 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6009 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
6010 // Avoid unnecessary undefs by reusing the other operand.
6011 if (Op01.isUndef())
6012 Op01 = Op23;
6013 else if (Op23.isUndef())
6014 Op23 = Op01;
6015 // Merging identical replications is a no-op.
6016 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
6017 return Op01;
6018 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
6019 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
6021 DL, MVT::v2i64, Op01, Op23);
6022 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6023 }
6024
6025 // Collect the constant terms.
6028
6029 unsigned NumConstants = 0;
6030 for (unsigned I = 0; I < NumElements; ++I) {
6031 SDValue Elem = Elems[I];
6032 if (Elem.getOpcode() == ISD::Constant ||
6033 Elem.getOpcode() == ISD::ConstantFP) {
6034 NumConstants += 1;
6035 Constants[I] = Elem;
6036 Done[I] = true;
6037 }
6038 }
6039 // If there was at least one constant, fill in the other elements of
6040 // Constants with undefs to get a full vector constant and use that
6041 // as the starting point.
6043 SDValue ReplicatedVal;
6044 if (NumConstants > 0) {
6045 for (unsigned I = 0; I < NumElements; ++I)
6046 if (!Constants[I].getNode())
6047 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
6048 Result = DAG.getBuildVector(VT, DL, Constants);
6049 } else {
6050 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
6051 // avoid a false dependency on any previous contents of the vector
6052 // register.
6053
6054 // Use a VLREP if at least one element is a load. Make sure to replicate
6055 // the load with the most elements having its value.
6056 std::map<const SDNode*, unsigned> UseCounts;
6057 SDNode *LoadMaxUses = nullptr;
6058 for (unsigned I = 0; I < NumElements; ++I)
6059 if (isVectorElementLoad(Elems[I])) {
6060 SDNode *Ld = Elems[I].getNode();
6061 UseCounts[Ld]++;
6062 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
6063 LoadMaxUses = Ld;
6064 }
6065 if (LoadMaxUses != nullptr) {
6066 ReplicatedVal = SDValue(LoadMaxUses, 0);
6067 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
6068 } else {
6069 // Try to use VLVGP.
6070 unsigned I1 = NumElements / 2 - 1;
6071 unsigned I2 = NumElements - 1;
6072 bool Def1 = !Elems[I1].isUndef();
6073 bool Def2 = !Elems[I2].isUndef();
6074 if (Def1 || Def2) {
6075 SDValue Elem1 = Elems[Def1 ? I1 : I2];
6076 SDValue Elem2 = Elems[Def2 ? I2 : I1];
6077 Result = DAG.getNode(ISD::BITCAST, DL, VT,
6078 joinDwords(DAG, DL, Elem1, Elem2));
6079 Done[I1] = true;
6080 Done[I2] = true;
6081 } else
6082 Result = DAG.getUNDEF(VT);
6083 }
6084 }
6085
6086 // Use VLVGx to insert the other elements.
6087 for (unsigned I = 0; I < NumElements; ++I)
6088 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
6089 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
6090 DAG.getConstant(I, DL, MVT::i32));
6091 return Result;
6092}
6093
6094SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
6095 SelectionDAG &DAG) const {
6096 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
6097 SDLoc DL(Op);
6098 EVT VT = Op.getValueType();
6099
6100 if (BVN->isConstant()) {
6101 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
6102 return Op;
6103
6104 // Fall back to loading it from memory.
6105 return SDValue();
6106 }
6107
6108 // See if we should use shuffles to construct the vector from other vectors.
6109 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
6110 return Res;
6111
6112 // Detect SCALAR_TO_VECTOR conversions.
6114 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
6115
6116 // Otherwise use buildVector to build the vector up from GPRs.
6117 unsigned NumElements = Op.getNumOperands();
6119 for (unsigned I = 0; I < NumElements; ++I)
6120 Ops[I] = Op.getOperand(I);
6121 return buildVector(DAG, DL, VT, Ops);
6122}
6123
6124SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
6125 SelectionDAG &DAG) const {
6126 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
6127 SDLoc DL(Op);
6128 EVT VT = Op.getValueType();
6129 unsigned NumElements = VT.getVectorNumElements();
6130
6131 if (VSN->isSplat()) {
6132 SDValue Op0 = Op.getOperand(0);
6133 unsigned Index = VSN->getSplatIndex();
6134 assert(Index < VT.getVectorNumElements() &&
6135 "Splat index should be defined and in first operand");
6136 // See whether the value we're splatting is directly available as a scalar.
6137 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6139 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
6140 // Otherwise keep it as a vector-to-vector operation.
6141 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
6142 DAG.getTargetConstant(Index, DL, MVT::i32));
6143 }
6144
6145 GeneralShuffle GS(VT);
6146 for (unsigned I = 0; I < NumElements; ++I) {
6147 int Elt = VSN->getMaskElt(I);
6148 if (Elt < 0)
6149 GS.addUndef();
6150 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
6151 unsigned(Elt) % NumElements))
6152 return SDValue();
6153 }
6154 return GS.getNode(DAG, SDLoc(VSN));
6155}
6156
6157SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
6158 SelectionDAG &DAG) const {
6159 SDLoc DL(Op);
6160 // Just insert the scalar into element 0 of an undefined vector.
6161 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
6162 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
6163 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
6164}
6165
6166SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
6167 SelectionDAG &DAG) const {
6168 // Handle insertions of floating-point values.
6169 SDLoc DL(Op);
6170 SDValue Op0 = Op.getOperand(0);
6171 SDValue Op1 = Op.getOperand(1);
6172 SDValue Op2 = Op.getOperand(2);
6173 EVT VT = Op.getValueType();
6174
6175 // Insertions into constant indices of a v2f64 can be done using VPDI.
6176 // However, if the inserted value is a bitcast or a constant then it's
6177 // better to use GPRs, as below.
6178 if (VT == MVT::v2f64 &&
6179 Op1.getOpcode() != ISD::BITCAST &&
6180 Op1.getOpcode() != ISD::ConstantFP &&
6181 Op2.getOpcode() == ISD::Constant) {
6182 uint64_t Index = Op2->getAsZExtVal();
6183 unsigned Mask = VT.getVectorNumElements() - 1;
6184 if (Index <= Mask)
6185 return Op;
6186 }
6187
6188 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
6190 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
6191 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
6192 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
6193 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
6194 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6195}
6196
6197SDValue
6198SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
6199 SelectionDAG &DAG) const {
6200 // Handle extractions of floating-point values.
6201 SDLoc DL(Op);
6202 SDValue Op0 = Op.getOperand(0);
6203 SDValue Op1 = Op.getOperand(1);
6204 EVT VT = Op.getValueType();
6205 EVT VecVT = Op0.getValueType();
6206
6207 // Extractions of constant indices can be done directly.
6208 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
6209 uint64_t Index = CIndexN->getZExtValue();
6210 unsigned Mask = VecVT.getVectorNumElements() - 1;
6211 if (Index <= Mask)
6212 return Op;
6213 }
6214
6215 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
6216 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
6217 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
6218 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
6219 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
6220 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6221}
6222
6223SDValue SystemZTargetLowering::
6224lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6225 SDValue PackedOp = Op.getOperand(0);
6226 EVT OutVT = Op.getValueType();
6227 EVT InVT = PackedOp.getValueType();
6228 unsigned ToBits = OutVT.getScalarSizeInBits();
6229 unsigned FromBits = InVT.getScalarSizeInBits();
6230 do {
6231 FromBits *= 2;
6232 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
6233 SystemZ::VectorBits / FromBits);
6234 PackedOp =
6235 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
6236 } while (FromBits != ToBits);
6237 return PackedOp;
6238}
6239
6240// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
6241SDValue SystemZTargetLowering::
6242lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6243 SDValue PackedOp = Op.getOperand(0);
6244 SDLoc DL(Op);
6245 EVT OutVT = Op.getValueType();
6246 EVT InVT = PackedOp.getValueType();
6247 unsigned InNumElts = InVT.getVectorNumElements();
6248 unsigned OutNumElts = OutVT.getVectorNumElements();
6249 unsigned NumInPerOut = InNumElts / OutNumElts;
6250
6251 SDValue ZeroVec =
6252 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
6253
6254 SmallVector<int, 16> Mask(InNumElts);
6255 unsigned ZeroVecElt = InNumElts;
6256 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
6257 unsigned MaskElt = PackedElt * NumInPerOut;
6258 unsigned End = MaskElt + NumInPerOut - 1;
6259 for (; MaskElt < End; MaskElt++)
6260 Mask[MaskElt] = ZeroVecElt++;
6261 Mask[MaskElt] = PackedElt;
6262 }
6263 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
6264 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
6265}
6266
6267SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6268 unsigned ByScalar) const {
6269 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6270 SDValue Op0 = Op.getOperand(0);
6271 SDValue Op1 = Op.getOperand(1);
6272 SDLoc DL(Op);
6273 EVT VT = Op.getValueType();
6274 unsigned ElemBitSize = VT.getScalarSizeInBits();
6275
6276 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6277 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6278 APInt SplatBits, SplatUndef;
6279 unsigned SplatBitSize;
6280 bool HasAnyUndefs;
6281 // Check for constant splats. Use ElemBitSize as the minimum element
6282 // width and reject splats that need wider elements.
6283 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6284 ElemBitSize, true) &&
6285 SplatBitSize == ElemBitSize) {
6286 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6287 DL, MVT::i32);
6288 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6289 }
6290 // Check for variable splats.
6291 BitVector UndefElements;
6292 SDValue Splat = BVN->getSplatValue(&UndefElements);
6293 if (Splat) {
6294 // Since i32 is the smallest legal type, we either need a no-op
6295 // or a truncation.
6296 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6297 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6298 }
6299 }
6300
6301 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6302 // and the shift amount is directly available in a GPR.
6303 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6304 if (VSN->isSplat()) {
6305 SDValue VSNOp0 = VSN->getOperand(0);
6306 unsigned Index = VSN->getSplatIndex();
6307 assert(Index < VT.getVectorNumElements() &&
6308 "Splat index should be defined and in first operand");
6309 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6310 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6311 // Since i32 is the smallest legal type, we either need a no-op
6312 // or a truncation.
6313 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6314 VSNOp0.getOperand(Index));
6315 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6316 }
6317 }
6318 }
6319
6320 // Otherwise just treat the current form as legal.
6321 return Op;
6322}
6323
6325 SDLoc dl(Op);
6326 SDValue Src = Op.getOperand(0);
6327 MVT DstVT = Op.getSimpleValueType();
6328
6329 AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Op.getNode());
6330 unsigned SrcAS = N->getSrcAddressSpace();
6331
6332 assert(SrcAS != N->getDestAddressSpace() &&
6333 "addrspacecast must be between different address spaces");
6334
6335 // addrspacecast [0 <- 1] : Assinging a ptr32 value to a 64-bit pointer.
6336 // addrspacecast [1 <- 0] : Assigining a 64-bit pointer to a ptr32 value.
6337 if (SrcAS == SYSTEMZAS::PTR32 && DstVT == MVT::i64) {
6338 Op = DAG.getNode(ISD::AND, dl, MVT::i32, Src,
6339 DAG.getConstant(0x7fffffff, dl, MVT::i32));
6340 Op = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op);
6341 } else if (DstVT == MVT::i32) {
6342 Op = DAG.getNode(ISD::TRUNCATE, dl, DstVT, Src);
6343 Op = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
6344 DAG.getConstant(0x7fffffff, dl, MVT::i32));
6345 Op = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op);
6346 } else {
6347 report_fatal_error("Bad address space in addrspacecast");
6348 }
6349 return Op;
6350}
6351
6352SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
6353 SelectionDAG &DAG) const {
6354 SDLoc DL(Op);
6355 MVT ResultVT = Op.getSimpleValueType();
6356 SDValue Arg = Op.getOperand(0);
6357 unsigned Check = Op.getConstantOperandVal(1);
6358
6359 unsigned TDCMask = 0;
6360 if (Check & fcSNan)
6362 if (Check & fcQNan)
6364 if (Check & fcPosInf)
6366 if (Check & fcNegInf)
6368 if (Check & fcPosNormal)
6370 if (Check & fcNegNormal)
6372 if (Check & fcPosSubnormal)
6374 if (Check & fcNegSubnormal)
6376 if (Check & fcPosZero)
6377 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
6378 if (Check & fcNegZero)
6379 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
6380 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
6381
6382 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
6383 return getCCResult(DAG, Intr);
6384}
6385
6386SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
6387 SelectionDAG &DAG) const {
6388 SDLoc DL(Op);
6389 SDValue Chain = Op.getOperand(0);
6390
6391 // STCKF only supports a memory operand, so we have to use a temporary.
6392 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
6393 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
6394 MachinePointerInfo MPI =
6396
6397 // Use STCFK to store the TOD clock into the temporary.
6398 SDValue StoreOps[] = {Chain, StackPtr};
6399 Chain = DAG.getMemIntrinsicNode(
6400 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
6402
6403 // And read it back from there.
6404 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
6405}
6406
6408 SelectionDAG &DAG) const {
6409 switch (Op.getOpcode()) {
6410 case ISD::FRAMEADDR:
6411 return lowerFRAMEADDR(Op, DAG);
6412 case ISD::RETURNADDR:
6413 return lowerRETURNADDR(Op, DAG);
6414 case ISD::BR_CC:
6415 return lowerBR_CC(Op, DAG);
6416 case ISD::SELECT_CC:
6417 return lowerSELECT_CC(Op, DAG);
6418 case ISD::SETCC:
6419 return lowerSETCC(Op, DAG);
6420 case ISD::STRICT_FSETCC:
6421 return lowerSTRICT_FSETCC(Op, DAG, false);
6423 return lowerSTRICT_FSETCC(Op, DAG, true);
6424 case ISD::GlobalAddress:
6425 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
6427 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
6428 case ISD::BlockAddress:
6429 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
6430 case ISD::JumpTable:
6431 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
6432 case ISD::ConstantPool:
6433 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
6434 case ISD::BITCAST:
6435 return lowerBITCAST(Op, DAG);
6436 case ISD::VASTART:
6437 return lowerVASTART(Op, DAG);
6438 case ISD::VACOPY:
6439 return lowerVACOPY(Op, DAG);
6441 return lowerDYNAMIC_STACKALLOC(Op, DAG);
6443 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
6444 case ISD::SMUL_LOHI:
6445 return lowerSMUL_LOHI(Op, DAG);
6446 case ISD::UMUL_LOHI:
6447 return lowerUMUL_LOHI(Op, DAG);
6448 case ISD::SDIVREM:
6449 return lowerSDIVREM(Op, DAG);
6450 case ISD::UDIVREM:
6451 return lowerUDIVREM(Op, DAG);
6452 case ISD::SADDO:
6453 case ISD::SSUBO:
6454 case ISD::UADDO:
6455 case ISD::USUBO:
6456 return lowerXALUO(Op, DAG);
6457 case ISD::UADDO_CARRY:
6458 case ISD::USUBO_CARRY:
6459 return lowerUADDSUBO_CARRY(Op, DAG);
6460 case ISD::OR:
6461 return lowerOR(Op, DAG);
6462 case ISD::CTPOP:
6463 return lowerCTPOP(Op, DAG);
6464 case ISD::VECREDUCE_ADD:
6465 return lowerVECREDUCE_ADD(Op, DAG);
6466 case ISD::ATOMIC_FENCE:
6467 return lowerATOMIC_FENCE(Op, DAG);
6468 case ISD::ATOMIC_SWAP:
6469 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
6470 case ISD::ATOMIC_STORE:
6471 case ISD::ATOMIC_LOAD:
6472 return lowerATOMIC_LDST_I128(Op, DAG);
6474 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
6476 return lowerATOMIC_LOAD_SUB(Op, DAG);
6478 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
6480 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
6482 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
6484 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
6486 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
6488 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
6490 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
6492 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
6494 return lowerATOMIC_CMP_SWAP(Op, DAG);
6495 case ISD::STACKSAVE:
6496 return lowerSTACKSAVE(Op, DAG);
6497 case ISD::STACKRESTORE:
6498 return lowerSTACKRESTORE(Op, DAG);
6499 case ISD::PREFETCH:
6500 return lowerPREFETCH(Op, DAG);
6502 return lowerINTRINSIC_W_CHAIN(Op, DAG);
6504 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
6505 case ISD::BUILD_VECTOR:
6506 return lowerBUILD_VECTOR(Op, DAG);
6508 return lowerVECTOR_SHUFFLE(Op, DAG);
6510 return lowerSCALAR_TO_VECTOR(Op, DAG);
6512 return lowerINSERT_VECTOR_ELT(Op, DAG);
6514 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6516 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
6518 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
6519 case ISD::SHL:
6520 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
6521 case ISD::SRL:
6522 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
6523 case ISD::SRA:
6524 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
6525 case ISD::ADDRSPACECAST:
6526 return lowerAddrSpaceCast(Op, DAG);
6527 case ISD::ROTL:
6528 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
6529 case ISD::IS_FPCLASS:
6530 return lowerIS_FPCLASS(Op, DAG);
6531 case ISD::GET_ROUNDING:
6532 return lowerGET_ROUNDING(Op, DAG);
6534 return lowerREADCYCLECOUNTER(Op, DAG);
6537 // These operations are legal on our platform, but we cannot actually
6538 // set the operation action to Legal as common code would treat this
6539 // as equivalent to Expand. Instead, we keep the operation action to
6540 // Custom and just leave them unchanged here.
6541 return Op;
6542
6543 default:
6544 llvm_unreachable("Unexpected node to lower");
6545 }
6546}
6547
6549 const SDLoc &SL) {
6550 // If i128 is legal, just use a normal bitcast.
6551 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
6552 return DAG.getBitcast(MVT::f128, Src);
6553
6554 // Otherwise, f128 must live in FP128, so do a partwise move.
6556 &SystemZ::FP128BitRegClass);
6557
6558 SDValue Hi, Lo;
6559 std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);
6560
6561 Hi = DAG.getBitcast(MVT::f64, Hi);
6562 Lo = DAG.getBitcast(MVT::f64, Lo);
6563
6564 SDNode *Pair = DAG.getMachineNode(
6565 SystemZ::REG_SEQUENCE, SL, MVT::f128,
6566 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo,
6567 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi,
6568 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)});
6569 return SDValue(Pair, 0);
6570}
6571
6573 const SDLoc &SL) {
6574 // If i128 is legal, just use a normal bitcast.
6575 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
6576 return DAG.getBitcast(MVT::i128, Src);
6577
6578 // Otherwise, f128 must live in FP128, so do a partwise move.
6580 &SystemZ::FP128BitRegClass);
6581
6582 SDValue LoFP =
6583 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
6584 SDValue HiFP =
6585 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);
6586 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
6587 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
6588
6589 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);
6590}
6591
6592// Lower operations with invalid operand or result types (currently used
6593// only for 128-bit integer types).
6594void
6597 SelectionDAG &DAG) const {
6598 switch (N->getOpcode()) {
6599 case ISD::ATOMIC_LOAD: {
6600 SDLoc DL(N);
6601 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
6602 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
6603 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6605 DL, Tys, Ops, MVT::i128, MMO);
6606
6607 SDValue Lowered = lowerGR128ToI128(DAG, Res);
6608 if (N->getValueType(0) == MVT::f128)
6609 Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);
6610 Results.push_back(Lowered);
6611 Results.push_back(Res.getValue(1));
6612 break;
6613 }
6614 case ISD::ATOMIC_STORE: {
6615 SDLoc DL(N);
6616 SDVTList Tys = DAG.getVTList(MVT::Other);
6617 SDValue Val = N->getOperand(1);
6618 if (Val.getValueType() == MVT::f128)
6619 Val = expandBitCastF128ToI128(DAG, Val, DL);
6620 Val = lowerI128ToGR128(DAG, Val);
6621
6622 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
6623 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6625 DL, Tys, Ops, MVT::i128, MMO);
6626 // We have to enforce sequential consistency by performing a
6627 // serialization operation after the store.
6628 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
6630 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
6631 MVT::Other, Res), 0);
6632 Results.push_back(Res);
6633 break;
6634 }
6636 SDLoc DL(N);
6637 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
6638 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
6639 lowerI128ToGR128(DAG, N->getOperand(2)),
6640 lowerI128ToGR128(DAG, N->getOperand(3)) };
6641 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6643 DL, Tys, Ops, MVT::i128, MMO);
6644 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
6646 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
6647 Results.push_back(lowerGR128ToI128(DAG, Res));
6648 Results.push_back(Success);
6649 Results.push_back(Res.getValue(2));
6650 break;
6651 }
6652 case ISD::BITCAST: {
6653 SDValue Src = N->getOperand(0);
6654 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
6655 !useSoftFloat()) {
6656 SDLoc DL(N);
6657 Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));
6658 }
6659 break;
6660 }
6661 default:
6662 llvm_unreachable("Unexpected node to lower");
6663 }
6664}
6665
6666void
6669 SelectionDAG &DAG) const {
6670 return LowerOperationWrapper(N, Results, DAG);
6671}
6672
6673const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
6674#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
6675 switch ((SystemZISD::NodeType)Opcode) {
6676 case SystemZISD::FIRST_NUMBER: break;
6677 OPCODE(RET_GLUE);
6678 OPCODE(CALL);
6679 OPCODE(SIBCALL);
6680 OPCODE(TLS_GDCALL);
6681 OPCODE(TLS_LDCALL);
6682 OPCODE(PCREL_WRAPPER);
6683 OPCODE(PCREL_OFFSET);
6684 OPCODE(ICMP);
6685 OPCODE(FCMP);
6686 OPCODE(STRICT_FCMP);
6687 OPCODE(STRICT_FCMPS);
6688 OPCODE(TM);
6689 OPCODE(BR_CCMASK);
6690 OPCODE(SELECT_CCMASK);
6691 OPCODE(ADJDYNALLOC);
6692 OPCODE(PROBED_ALLOCA);
6693 OPCODE(POPCNT);
6694 OPCODE(SMUL_LOHI);
6695 OPCODE(UMUL_LOHI);
6696 OPCODE(SDIVREM);
6697 OPCODE(UDIVREM);
6698 OPCODE(SADDO);
6699 OPCODE(SSUBO);
6700 OPCODE(UADDO);
6701 OPCODE(USUBO);
6702 OPCODE(ADDCARRY);
6703 OPCODE(SUBCARRY);
6704 OPCODE(GET_CCMASK);
6705 OPCODE(MVC);
6706 OPCODE(NC);
6707 OPCODE(OC);
6708 OPCODE(XC);
6709 OPCODE(CLC);
6710 OPCODE(MEMSET_MVC);
6711 OPCODE(STPCPY);
6712 OPCODE(STRCMP);
6713 OPCODE(SEARCH_STRING);
6714 OPCODE(IPM);
6715 OPCODE(TBEGIN);
6716 OPCODE(TBEGIN_NOFLOAT);
6717 OPCODE(TEND);
6718 OPCODE(BYTE_MASK);
6719 OPCODE(ROTATE_MASK);
6720 OPCODE(REPLICATE);
6721 OPCODE(JOIN_DWORDS);
6722 OPCODE(SPLAT);
6723 OPCODE(MERGE_HIGH);
6724 OPCODE(MERGE_LOW);
6725 OPCODE(SHL_DOUBLE);
6726 OPCODE(PERMUTE_DWORDS);
6727 OPCODE(PERMUTE);
6728 OPCODE(PACK);
6729 OPCODE(PACKS_CC);
6730 OPCODE(PACKLS_CC);
6731 OPCODE(UNPACK_HIGH);
6732 OPCODE(UNPACKL_HIGH);
6733 OPCODE(UNPACK_LOW);
6734 OPCODE(UNPACKL_LOW);
6735 OPCODE(VSHL_BY_SCALAR);
6736 OPCODE(VSRL_BY_SCALAR);
6737 OPCODE(VSRA_BY_SCALAR);
6738 OPCODE(VROTL_BY_SCALAR);
6739 OPCODE(VSUM);
6740 OPCODE(VACC);
6741 OPCODE(VSCBI);
6742 OPCODE(VAC);
6743 OPCODE(VSBI);
6744 OPCODE(VACCC);
6745 OPCODE(VSBCBI);
6746 OPCODE(VICMPE);
6747 OPCODE(VICMPH);
6748 OPCODE(VICMPHL);
6749 OPCODE(VICMPES);
6750 OPCODE(VICMPHS);
6751 OPCODE(VICMPHLS);
6752 OPCODE(VFCMPE);
6753 OPCODE(STRICT_VFCMPE);
6754 OPCODE(STRICT_VFCMPES);
6755 OPCODE(VFCMPH);
6756 OPCODE(STRICT_VFCMPH);
6757 OPCODE(STRICT_VFCMPHS);
6758 OPCODE(VFCMPHE);
6759 OPCODE(STRICT_VFCMPHE);
6760 OPCODE(STRICT_VFCMPHES);
6761 OPCODE(VFCMPES);
6762 OPCODE(VFCMPHS);
6763 OPCODE(VFCMPHES);
6764 OPCODE(VFTCI);
6765 OPCODE(VEXTEND);
6766 OPCODE(STRICT_VEXTEND);
6767 OPCODE(VROUND);
6768 OPCODE(STRICT_VROUND);
6769 OPCODE(VTM);
6770 OPCODE(SCMP128HI);
6771 OPCODE(UCMP128HI);
6772 OPCODE(VFAE_CC);
6773 OPCODE(VFAEZ_CC);
6774 OPCODE(VFEE_CC);
6775 OPCODE(VFEEZ_CC);
6776 OPCODE(VFENE_CC);
6777 OPCODE(VFENEZ_CC);
6778 OPCODE(VISTR_CC);
6779 OPCODE(VSTRC_CC);
6780 OPCODE(VSTRCZ_CC);
6781 OPCODE(VSTRS_CC);
6782 OPCODE(VSTRSZ_CC);
6783 OPCODE(TDC);
6784 OPCODE(ATOMIC_SWAPW);
6785 OPCODE(ATOMIC_LOADW_ADD);
6786 OPCODE(ATOMIC_LOADW_SUB);
6787 OPCODE(ATOMIC_LOADW_AND);
6788 OPCODE(ATOMIC_LOADW_OR);
6789 OPCODE(ATOMIC_LOADW_XOR);
6790 OPCODE(ATOMIC_LOADW_NAND);
6791 OPCODE(ATOMIC_LOADW_MIN);
6792 OPCODE(ATOMIC_LOADW_MAX);
6793 OPCODE(ATOMIC_LOADW_UMIN);
6794 OPCODE(ATOMIC_LOADW_UMAX);
6795 OPCODE(ATOMIC_CMP_SWAPW);
6796 OPCODE(ATOMIC_CMP_SWAP);
6797 OPCODE(ATOMIC_LOAD_128);
6798 OPCODE(ATOMIC_STORE_128);
6799 OPCODE(ATOMIC_CMP_SWAP_128);
6800 OPCODE(LRV);
6801 OPCODE(STRV);
6802 OPCODE(VLER);
6803 OPCODE(VSTER);
6804 OPCODE(STCKF);
6806 OPCODE(ADA_ENTRY);
6807 }
6808 return nullptr;
6809#undef OPCODE
6810}
6811
6812// Return true if VT is a vector whose elements are a whole number of bytes
6813// in width. Also check for presence of vector support.
6814bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
6815 if (!Subtarget.hasVector())
6816 return false;
6817
6818 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
6819}
6820
6821// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
6822// producing a result of type ResVT. Op is a possibly bitcast version
6823// of the input vector and Index is the index (based on type VecVT) that
6824// should be extracted. Return the new extraction if a simplification
6825// was possible or if Force is true.
6826SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
6827 EVT VecVT, SDValue Op,
6828 unsigned Index,
6829 DAGCombinerInfo &DCI,
6830 bool Force) const {
6831 SelectionDAG &DAG = DCI.DAG;
6832
6833 // The number of bytes being extracted.
6834 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6835
6836 for (;;) {
6837 unsigned Opcode = Op.getOpcode();
6838 if (Opcode == ISD::BITCAST)
6839 // Look through bitcasts.
6840 Op = Op.getOperand(0);
6841 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
6842 canTreatAsByteVector(Op.getValueType())) {
6843 // Get a VPERM-like permute mask and see whether the bytes covered
6844 // by the extracted element are a contiguous sequence from one
6845 // source operand.
6847 if (!getVPermMask(Op, Bytes))
6848 break;
6849 int First;
6850 if (!getShuffleInput(Bytes, Index * BytesPerElement,
6851 BytesPerElement, First))
6852 break;
6853 if (First < 0)
6854 return DAG.getUNDEF(ResVT);
6855 // Make sure the contiguous sequence starts at a multiple of the
6856 // original element size.
6857 unsigned Byte = unsigned(First) % Bytes.size();
6858 if (Byte % BytesPerElement != 0)
6859 break;
6860 // We can get the extracted value directly from an input.
6861 Index = Byte / BytesPerElement;
6862 Op = Op.getOperand(unsigned(First) / Bytes.size());
6863 Force = true;
6864 } else if (Opcode == ISD::BUILD_VECTOR &&
6865 canTreatAsByteVector(Op.getValueType())) {
6866 // We can only optimize this case if the BUILD_VECTOR elements are
6867 // at least as wide as the extracted value.
6868 EVT OpVT = Op.getValueType();
6869 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6870 if (OpBytesPerElement < BytesPerElement)
6871 break;
6872 // Make sure that the least-significant bit of the extracted value
6873 // is the least significant bit of an input.
6874 unsigned End = (Index + 1) * BytesPerElement;
6875 if (End % OpBytesPerElement != 0)
6876 break;
6877 // We're extracting the low part of one operand of the BUILD_VECTOR.
6878 Op = Op.getOperand(End / OpBytesPerElement - 1);
6879 if (!Op.getValueType().isInteger()) {
6880 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
6881 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
6882 DCI.AddToWorklist(Op.getNode());
6883 }
6884 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
6885 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
6886 if (VT != ResVT) {
6887 DCI.AddToWorklist(Op.getNode());
6888 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
6889 }
6890 return Op;
6891 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6893 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6894 canTreatAsByteVector(Op.getValueType()) &&
6895 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
6896 // Make sure that only the unextended bits are significant.
6897 EVT ExtVT = Op.getValueType();
6898 EVT OpVT = Op.getOperand(0).getValueType();
6899 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
6900 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6901 unsigned Byte = Index * BytesPerElement;
6902 unsigned SubByte = Byte % ExtBytesPerElement;
6903 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
6904 if (SubByte < MinSubByte ||
6905 SubByte + BytesPerElement > ExtBytesPerElement)
6906 break;
6907 // Get the byte offset of the unextended element
6908 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
6909 // ...then add the byte offset relative to that element.
6910 Byte += SubByte - MinSubByte;
6911 if (Byte % BytesPerElement != 0)
6912 break;
6913 Op = Op.getOperand(0);
6914 Index = Byte / BytesPerElement;
6915 Force = true;
6916 } else
6917 break;
6918 }
6919 if (Force) {
6920 if (Op.getValueType() != VecVT) {
6921 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
6922 DCI.AddToWorklist(Op.getNode());
6923 }
6924 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
6925 DAG.getConstant(Index, DL, MVT::i32));
6926 }
6927 return SDValue();
6928}
6929
6930// Optimize vector operations in scalar value Op on the basis that Op
6931// is truncated to TruncVT.
6932SDValue SystemZTargetLowering::combineTruncateExtract(
6933 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
6934 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
6935 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
6936 // of type TruncVT.
6937 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6938 TruncVT.getSizeInBits() % 8 == 0) {
6939 SDValue Vec = Op.getOperand(0);
6940 EVT VecVT = Vec.getValueType();
6941 if (canTreatAsByteVector(VecVT)) {
6942 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
6943 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6944 unsigned TruncBytes = TruncVT.getStoreSize();
6945 if (BytesPerElement % TruncBytes == 0) {
6946 // Calculate the value of Y' in the above description. We are
6947 // splitting the original elements into Scale equal-sized pieces
6948 // and for truncation purposes want the last (least-significant)
6949 // of these pieces for IndexN. This is easiest to do by calculating
6950 // the start index of the following element and then subtracting 1.
6951 unsigned Scale = BytesPerElement / TruncBytes;
6952 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
6953
6954 // Defer the creation of the bitcast from X to combineExtract,
6955 // which might be able to optimize the extraction.
6956 VecVT = EVT::getVectorVT(*DCI.DAG.getContext(),
6957 MVT::getIntegerVT(TruncBytes * 8),
6958 VecVT.getStoreSize() / TruncBytes);
6959 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
6960 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
6961 }
6962 }
6963 }
6964 }
6965 return SDValue();
6966}
6967
6968SDValue SystemZTargetLowering::combineZERO_EXTEND(
6969 SDNode *N, DAGCombinerInfo &DCI) const {
6970 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
6971 SelectionDAG &DAG = DCI.DAG;
6972 SDValue N0 = N->getOperand(0);
6973 EVT VT = N->getValueType(0);
6975 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
6976 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6977 if (TrueOp && FalseOp) {
6978 SDLoc DL(N0);
6979 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
6980 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
6981 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
6982 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
6983 // If N0 has multiple uses, change other uses as well.
6984 if (!N0.hasOneUse()) {
6985 SDValue TruncSelect =
6986 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
6987 DCI.CombineTo(N0.getNode(), TruncSelect);
6988 }
6989 return NewSelect;
6990 }
6991 }
6992 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
6993 // of the result is smaller than the size of X and all the truncated bits
6994 // of X are already zero.
6995 if (N0.getOpcode() == ISD::XOR &&
6996 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
6997 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6998 N0.getOperand(1).getOpcode() == ISD::Constant) {
6999 SDValue X = N0.getOperand(0).getOperand(0);
7000 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
7001 KnownBits Known = DAG.computeKnownBits(X);
7002 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
7003 N0.getValueSizeInBits(),
7004 VT.getSizeInBits());
7005 if (TruncatedBits.isSubsetOf(Known.Zero)) {
7006 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7008 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
7009 X, DAG.getConstant(Mask, SDLoc(N0), VT));
7010 }
7011 }
7012 }
7013
7014 return SDValue();
7015}
7016
7017SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
7018 SDNode *N, DAGCombinerInfo &DCI) const {
7019 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
7020 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
7021 // into (select_cc LHS, RHS, -1, 0, COND)
7022 SelectionDAG &DAG = DCI.DAG;
7023 SDValue N0 = N->getOperand(0);
7024 EVT VT = N->getValueType(0);
7025 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7026 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
7027 N0 = N0.getOperand(0);
7028 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
7029 SDLoc DL(N0);
7030 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
7031 DAG.getAllOnesConstant(DL, VT),
7032 DAG.getConstant(0, DL, VT), N0.getOperand(2) };
7033 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
7034 }
7035 return SDValue();
7036}
7037
7038SDValue SystemZTargetLowering::combineSIGN_EXTEND(
7039 SDNode *N, DAGCombinerInfo &DCI) const {
7040 // Convert (sext (ashr (shl X, C1), C2)) to
7041 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
7042 // cheap as narrower ones.
7043 SelectionDAG &DAG = DCI.DAG;
7044 SDValue N0 = N->getOperand(0);
7045 EVT VT = N->getValueType(0);
7046 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
7047 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7048 SDValue Inner = N0.getOperand(0);
7049 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
7050 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
7051 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
7052 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
7053 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
7054 EVT ShiftVT = N0.getOperand(1).getValueType();
7055 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
7056 Inner.getOperand(0));
7057 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
7058 DAG.getConstant(NewShlAmt, SDLoc(Inner),
7059 ShiftVT));
7060 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
7061 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
7062 }
7063 }
7064 }
7065
7066 return SDValue();
7067}
7068
7069SDValue SystemZTargetLowering::combineMERGE(
7070 SDNode *N, DAGCombinerInfo &DCI) const {
7071 SelectionDAG &DAG = DCI.DAG;
7072 unsigned Opcode = N->getOpcode();
7073 SDValue Op0 = N->getOperand(0);
7074 SDValue Op1 = N->getOperand(1);
7075 if (Op0.getOpcode() == ISD::BITCAST)
7076 Op0 = Op0.getOperand(0);
7078 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
7079 // for v4f32.
7080 if (Op1 == N->getOperand(0))
7081 return Op1;
7082 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
7083 EVT VT = Op1.getValueType();
7084 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
7085 if (ElemBytes <= 4) {
7086 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
7089 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
7090 SystemZ::VectorBytes / ElemBytes / 2);
7091 if (VT != InVT) {
7092 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
7093 DCI.AddToWorklist(Op1.getNode());
7094 }
7095 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
7096 DCI.AddToWorklist(Op.getNode());
7097 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
7098 }
7099 }
7100 return SDValue();
7101}
7102
7103static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7104 SDNode *&HiPart) {
7105 LoPart = HiPart = nullptr;
7106
7107 // Scan through all users.
7108 for (SDUse &Use : LD->uses()) {
7109 // Skip the uses of the chain.
7110 if (Use.getResNo() != 0)
7111 continue;
7112
7113 // Verify every user is a TRUNCATE to i64 of the low or high half.
7114 SDNode *User = Use.getUser();
7115 bool IsLoPart = true;
7116 if (User->getOpcode() == ISD::SRL &&
7117 User->getOperand(1).getOpcode() == ISD::Constant &&
7118 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
7119 User = *User->user_begin();
7120 IsLoPart = false;
7121 }
7122 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)
7123 return false;
7124
7125 if (IsLoPart) {
7126 if (LoPart)
7127 return false;
7128 LoPart = User;
7129 } else {
7130 if (HiPart)
7131 return false;
7132 HiPart = User;
7133 }
7134 }
7135 return true;
7136}
7137
7138static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7139 SDNode *&HiPart) {
7140 LoPart = HiPart = nullptr;
7141
7142 // Scan through all users.
7143 for (SDUse &Use : LD->uses()) {
7144 // Skip the uses of the chain.
7145 if (Use.getResNo() != 0)
7146 continue;
7147
7148 // Verify every user is an EXTRACT_SUBREG of the low or high half.
7149 SDNode *User = Use.getUser();
7150 if (!User->hasOneUse() || !User->isMachineOpcode() ||
7151 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
7152 return false;
7153
7154 switch (User->getConstantOperandVal(1)) {
7155 case SystemZ::subreg_l64:
7156 if (LoPart)
7157 return false;
7158 LoPart = User;
7159 break;
7160 case SystemZ::subreg_h64:
7161 if (HiPart)
7162 return false;
7163 HiPart = User;
7164 break;
7165 default:
7166 return false;
7167 }
7168 }
7169 return true;
7170}
7171
7172SDValue SystemZTargetLowering::combineLOAD(
7173 SDNode *N, DAGCombinerInfo &DCI) const {
7174 SelectionDAG &DAG = DCI.DAG;
7175 EVT LdVT = N->getValueType(0);
7176 if (auto *LN = dyn_cast<LoadSDNode>(N)) {
7177 if (LN->getAddressSpace() == SYSTEMZAS::PTR32) {
7178 MVT PtrVT = getPointerTy(DAG.getDataLayout());
7179 MVT LoadNodeVT = LN->getBasePtr().getSimpleValueType();
7180 if (PtrVT != LoadNodeVT) {
7181 SDLoc DL(LN);
7182 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(
7183 DL, PtrVT, LN->getBasePtr(), SYSTEMZAS::PTR32, 0);
7184 return DAG.getExtLoad(LN->getExtensionType(), DL, LN->getValueType(0),
7185 LN->getChain(), AddrSpaceCast, LN->getMemoryVT(),
7186 LN->getMemOperand());
7187 }
7188 }
7189 }
7190 SDLoc DL(N);
7191
7192 // Replace a 128-bit load that is used solely to move its value into GPRs
7193 // by separate loads of both halves.
7194 LoadSDNode *LD = cast<LoadSDNode>(N);
7195 if (LD->isSimple() && ISD::isNormalLoad(LD)) {
7196 SDNode *LoPart, *HiPart;
7197 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||
7198 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {
7199 // Rewrite each extraction as an independent load.
7200 SmallVector<SDValue, 2> ArgChains;
7201 if (HiPart) {
7202 SDValue EltLoad = DAG.getLoad(
7203 HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(),
7204 LD->getPointerInfo(), LD->getOriginalAlign(),
7205 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7206
7207 DCI.CombineTo(HiPart, EltLoad, true);
7208 ArgChains.push_back(EltLoad.getValue(1));
7209 }
7210 if (LoPart) {
7211 SDValue EltLoad = DAG.getLoad(
7212 LoPart->getValueType(0), DL, LD->getChain(),
7213 DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)),
7214 LD->getPointerInfo().getWithOffset(8), LD->getOriginalAlign(),
7215 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7216
7217 DCI.CombineTo(LoPart, EltLoad, true);
7218 ArgChains.push_back(EltLoad.getValue(1));
7219 }
7220
7221 // Collect all chains via TokenFactor.
7222 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains);
7223 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
7224 DCI.AddToWorklist(Chain.getNode());
7225 return SDValue(N, 0);
7226 }
7227 }
7228
7229 if (LdVT.isVector() || LdVT.isInteger())
7230 return SDValue();
7231 // Transform a scalar load that is REPLICATEd as well as having other
7232 // use(s) to the form where the other use(s) use the first element of the
7233 // REPLICATE instead of the load. Otherwise instruction selection will not
7234 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
7235 // point loads.
7236
7237 SDValue Replicate;
7238 SmallVector<SDNode*, 8> OtherUses;
7239 for (SDUse &Use : N->uses()) {
7240 if (Use.getUser()->getOpcode() == SystemZISD::REPLICATE) {
7241 if (Replicate)
7242 return SDValue(); // Should never happen
7243 Replicate = SDValue(Use.getUser(), 0);
7244 } else if (Use.getResNo() == 0)
7245 OtherUses.push_back(Use.getUser());
7246 }
7247 if (!Replicate || OtherUses.empty())
7248 return SDValue();
7249
7250 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
7251 Replicate, DAG.getConstant(0, DL, MVT::i32));
7252 // Update uses of the loaded Value while preserving old chains.
7253 for (SDNode *U : OtherUses) {
7255 for (SDValue Op : U->ops())
7256 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
7257 DAG.UpdateNodeOperands(U, Ops);
7258 }
7259 return SDValue(N, 0);
7260}
7261
7262bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
7263 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
7264 return true;
7265 if (Subtarget.hasVectorEnhancements2())
7266 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
7267 return true;
7268 return false;
7269}
7270
7272 if (!VT.isVector() || !VT.isSimple() ||
7273 VT.getSizeInBits() != 128 ||
7274 VT.getScalarSizeInBits() % 8 != 0)
7275 return false;
7276
7277 unsigned NumElts = VT.getVectorNumElements();
7278 for (unsigned i = 0; i < NumElts; ++i) {
7279 if (M[i] < 0) continue; // ignore UNDEF indices
7280 if ((unsigned) M[i] != NumElts - 1 - i)
7281 return false;
7282 }
7283
7284 return true;
7285}
7286
7287static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
7288 for (auto *U : StoredVal->users()) {
7289 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
7290 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
7291 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
7292 continue;
7293 } else if (isa<BuildVectorSDNode>(U)) {
7294 SDValue BuildVector = SDValue(U, 0);
7295 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
7296 isOnlyUsedByStores(BuildVector, DAG))
7297 continue;
7298 }
7299 return false;
7300 }
7301 return true;
7302}
7303
7304static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
7305 SDValue &HiPart) {
7306 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
7307 return false;
7308
7309 SDValue Op0 = Val.getOperand(0);
7310 SDValue Op1 = Val.getOperand(1);
7311
7312 if (Op0.getOpcode() == ISD::SHL)
7313 std::swap(Op0, Op1);
7314 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
7315 Op1.getOperand(1).getOpcode() != ISD::Constant ||
7316 Op1.getConstantOperandVal(1) != 64)
7317 return false;
7318 Op1 = Op1.getOperand(0);
7319
7320 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
7321 Op0.getOperand(0).getValueType() != MVT::i64)
7322 return false;
7323 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
7324 Op1.getOperand(0).getValueType() != MVT::i64)
7325 return false;
7326
7327 LoPart = Op0.getOperand(0);
7328 HiPart = Op1.getOperand(0);
7329 return true;
7330}
7331
7332static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
7333 SDValue &HiPart) {
7334 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
7335 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
7336 return false;
7337
7338 if (Val->getNumOperands() != 5 ||
7339 Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
7340 Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||
7341 Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)
7342 return false;
7343
7344 LoPart = Val->getOperand(1);
7345 HiPart = Val->getOperand(3);
7346 return true;
7347}
7348
7349SDValue SystemZTargetLowering::combineSTORE(
7350 SDNode *N, DAGCombinerInfo &DCI) const {
7351 SelectionDAG &DAG = DCI.DAG;
7352 auto *SN = cast<StoreSDNode>(N);
7353 auto &Op1 = N->getOperand(1);
7354 EVT MemVT = SN->getMemoryVT();
7355
7356 if (SN->getAddressSpace() == SYSTEMZAS::PTR32) {
7357 MVT PtrVT = getPointerTy(DAG.getDataLayout());
7358 MVT StoreNodeVT = SN->getBasePtr().getSimpleValueType();
7359 if (PtrVT != StoreNodeVT) {
7360 SDLoc DL(SN);
7361 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(DL, PtrVT, SN->getBasePtr(),
7362 SYSTEMZAS::PTR32, 0);
7363 return DAG.getStore(SN->getChain(), DL, SN->getValue(), AddrSpaceCast,
7364 SN->getPointerInfo(), SN->getOriginalAlign(),
7365 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7366 }
7367 }
7368
7369 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
7370 // for the extraction to be done on a vMiN value, so that we can use VSTE.
7371 // If X has wider elements then convert it to:
7372 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
7373 if (MemVT.isInteger() && SN->isTruncatingStore()) {
7374 if (SDValue Value =
7375 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
7376 DCI.AddToWorklist(Value.getNode());
7377
7378 // Rewrite the store with the new form of stored value.
7379 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
7380 SN->getBasePtr(), SN->getMemoryVT(),
7381 SN->getMemOperand());
7382 }
7383 }
7384 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
7385 if (!SN->isTruncatingStore() &&
7386 Op1.getOpcode() == ISD::BSWAP &&
7387 Op1.getNode()->hasOneUse() &&
7388 canLoadStoreByteSwapped(Op1.getValueType())) {
7389
7390 SDValue BSwapOp = Op1.getOperand(0);
7391
7392 if (BSwapOp.getValueType() == MVT::i16)
7393 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
7394
7395 SDValue Ops[] = {
7396 N->getOperand(0), BSwapOp, N->getOperand(2)
7397 };
7398
7399 return
7400 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
7401 Ops, MemVT, SN->getMemOperand());
7402 }
7403 // Combine STORE (element-swap) into VSTER
7404 if (!SN->isTruncatingStore() &&
7405 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
7406 Op1.getNode()->hasOneUse() &&
7407 Subtarget.hasVectorEnhancements2()) {
7408 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
7409 ArrayRef<int> ShuffleMask = SVN->getMask();
7410 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
7411 SDValue Ops[] = {
7412 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
7413 };
7414
7416 DAG.getVTList(MVT::Other),
7417 Ops, MemVT, SN->getMemOperand());
7418 }
7419 }
7420
7421 // Combine STORE (READCYCLECOUNTER) into STCKF.
7422 if (!SN->isTruncatingStore() &&
7424 Op1.hasOneUse() &&
7425 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
7426 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
7428 DAG.getVTList(MVT::Other),
7429 Ops, MemVT, SN->getMemOperand());
7430 }
7431
7432 // Transform a store of a 128-bit value moved from parts into two stores.
7433 if (SN->isSimple() && ISD::isNormalStore(SN)) {
7434 SDValue LoPart, HiPart;
7435 if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||
7436 (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {
7437 SDLoc DL(SN);
7438 SDValue Chain0 =
7439 DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(),
7440 SN->getPointerInfo(), SN->getOriginalAlign(),
7441 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7442 SDValue Chain1 =
7443 DAG.getStore(SN->getChain(), DL, LoPart,
7444 DAG.getObjectPtrOffset(DL, SN->getBasePtr(),
7446 SN->getPointerInfo().getWithOffset(8),
7447 SN->getOriginalAlign(),
7448 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7449
7450 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
7451 }
7452 }
7453
7454 // Replicate a reg or immediate with VREP instead of scalar multiply or
7455 // immediate load. It seems best to do this during the first DAGCombine as
7456 // it is straight-forward to handle the zero-extend node in the initial
7457 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
7458 // extracting an i16 element from a v16i8 vector).
7459 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
7460 isOnlyUsedByStores(Op1, DAG)) {
7461 SDValue Word = SDValue();
7462 EVT WordVT;
7463
7464 // Find a replicated immediate and return it if found in Word and its
7465 // type in WordVT.
7466 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
7467 // Some constants are better handled with a scalar store.
7468 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
7469 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
7470 return;
7471
7472 APInt Val = C->getAPIntValue();
7473 // Truncate Val in case of a truncating store.
7474 if (!llvm::isUIntN(TotBytes * 8, Val.getZExtValue())) {
7475 assert(SN->isTruncatingStore() &&
7476 "Non-truncating store and immediate value does not fit?");
7477 Val = Val.trunc(TotBytes * 8);
7478 }
7479
7480 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, Val.getZExtValue()));
7481 if (VCI.isVectorConstantLegal(Subtarget) &&
7482 VCI.Opcode == SystemZISD::REPLICATE) {
7483 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
7484 WordVT = VCI.VecVT.getScalarType();
7485 }
7486 };
7487
7488 // Find a replicated register and return it if found in Word and its type
7489 // in WordVT.
7490 auto FindReplicatedReg = [&](SDValue MulOp) {
7491 EVT MulVT = MulOp.getValueType();
7492 if (MulOp->getOpcode() == ISD::MUL &&
7493 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
7494 // Find a zero extended value and its type.
7495 SDValue LHS = MulOp->getOperand(0);
7496 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
7497 WordVT = LHS->getOperand(0).getValueType();
7498 else if (LHS->getOpcode() == ISD::AssertZext)
7499 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
7500 else
7501 return;
7502 // Find a replicating constant, e.g. 0x00010001.
7503 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
7505 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
7506 if (VCI.isVectorConstantLegal(Subtarget) &&
7507 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
7508 WordVT == VCI.VecVT.getScalarType())
7509 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
7510 }
7511 }
7512 };
7513
7514 if (isa<BuildVectorSDNode>(Op1) &&
7515 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
7516 SDValue SplatVal = Op1->getOperand(0);
7517 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
7518 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
7519 else
7520 FindReplicatedReg(SplatVal);
7521 } else {
7522 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
7523 FindReplicatedImm(C, MemVT.getStoreSize());
7524 else
7525 FindReplicatedReg(Op1);
7526 }
7527
7528 if (Word != SDValue()) {
7529 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
7530 "Bad type handling");
7531 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
7532 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
7533 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
7534 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
7535 SN->getBasePtr(), SN->getMemOperand());
7536 }
7537 }
7538
7539 return SDValue();
7540}
7541
7542SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
7543 SDNode *N, DAGCombinerInfo &DCI) const {
7544 SelectionDAG &DAG = DCI.DAG;
7545 // Combine element-swap (LOAD) into VLER
7546 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7547 N->getOperand(0).hasOneUse() &&
7548 Subtarget.hasVectorEnhancements2()) {
7549 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
7550 ArrayRef<int> ShuffleMask = SVN->getMask();
7551 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
7552 SDValue Load = N->getOperand(0);
7553 LoadSDNode *LD = cast<LoadSDNode>(Load);
7554
7555 // Create the element-swapping load.
7556 SDValue Ops[] = {
7557 LD->getChain(), // Chain
7558 LD->getBasePtr() // Ptr
7559 };
7560 SDValue ESLoad =
7562 DAG.getVTList(LD->getValueType(0), MVT::Other),
7563 Ops, LD->getMemoryVT(), LD->getMemOperand());
7564
7565 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
7566 // by the load dead.
7567 DCI.CombineTo(N, ESLoad);
7568
7569 // Next, combine the load away, we give it a bogus result value but a real
7570 // chain result. The result value is dead because the shuffle is dead.
7571 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
7572
7573 // Return N so it doesn't get rechecked!
7574 return SDValue(N, 0);
7575 }
7576 }
7577
7578 return SDValue();
7579}
7580
7581SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
7582 SDNode *N, DAGCombinerInfo &DCI) const {
7583 SelectionDAG &DAG = DCI.DAG;
7584
7585 if (!Subtarget.hasVector())
7586 return SDValue();
7587
7588 // Look through bitcasts that retain the number of vector elements.
7589 SDValue Op = N->getOperand(0);
7590 if (Op.getOpcode() == ISD::BITCAST &&
7591 Op.getValueType().isVector() &&
7592 Op.getOperand(0).getValueType().isVector() &&
7593 Op.getValueType().getVectorNumElements() ==
7594 Op.getOperand(0).getValueType().getVectorNumElements())
7595 Op = Op.getOperand(0);
7596
7597 // Pull BSWAP out of a vector extraction.
7598 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
7599 EVT VecVT = Op.getValueType();
7600 EVT EltVT = VecVT.getVectorElementType();
7601 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
7602 Op.getOperand(0), N->getOperand(1));
7603 DCI.AddToWorklist(Op.getNode());
7604 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
7605 if (EltVT != N->getValueType(0)) {
7606 DCI.AddToWorklist(Op.getNode());
7607 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
7608 }
7609 return Op;
7610 }
7611
7612 // Try to simplify a vector extraction.
7613 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
7614 SDValue Op0 = N->getOperand(0);
7615 EVT VecVT = Op0.getValueType();
7616 if (canTreatAsByteVector(VecVT))
7617 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
7618 IndexN->getZExtValue(), DCI, false);
7619 }
7620 return SDValue();
7621}
7622
7623SDValue SystemZTargetLowering::combineJOIN_DWORDS(
7624 SDNode *N, DAGCombinerInfo &DCI) const {
7625 SelectionDAG &DAG = DCI.DAG;
7626 // (join_dwords X, X) == (replicate X)
7627 if (N->getOperand(0) == N->getOperand(1))
7628 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
7629 N->getOperand(0));
7630 return SDValue();
7631}
7632
7634 SDValue Chain1 = N1->getOperand(0);
7635 SDValue Chain2 = N2->getOperand(0);
7636
7637 // Trivial case: both nodes take the same chain.
7638 if (Chain1 == Chain2)
7639 return Chain1;
7640
7641 // FIXME - we could handle more complex cases via TokenFactor,
7642 // assuming we can verify that this would not create a cycle.
7643 return SDValue();
7644}
7645
7646SDValue SystemZTargetLowering::combineFP_ROUND(
7647 SDNode *N, DAGCombinerInfo &DCI) const {
7648
7649 if (!Subtarget.hasVector())
7650 return SDValue();
7651
7652 // (fpround (extract_vector_elt X 0))
7653 // (fpround (extract_vector_elt X 1)) ->
7654 // (extract_vector_elt (VROUND X) 0)
7655 // (extract_vector_elt (VROUND X) 2)
7656 //
7657 // This is a special case since the target doesn't really support v2f32s.
7658 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7659 SelectionDAG &DAG = DCI.DAG;
7660 SDValue Op0 = N->getOperand(OpNo);
7661 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
7663 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
7664 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7665 Op0.getConstantOperandVal(1) == 0) {
7666 SDValue Vec = Op0.getOperand(0);
7667 for (auto *U : Vec->users()) {
7668 if (U != Op0.getNode() && U->hasOneUse() &&
7669 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7670 U->getOperand(0) == Vec &&
7671 U->getOperand(1).getOpcode() == ISD::Constant &&
7672 U->getConstantOperandVal(1) == 1) {
7673 SDValue OtherRound = SDValue(*U->user_begin(), 0);
7674 if (OtherRound.getOpcode() == N->getOpcode() &&
7675 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
7676 OtherRound.getValueType() == MVT::f32) {
7677 SDValue VRound, Chain;
7678 if (N->isStrictFPOpcode()) {
7679 Chain = MergeInputChains(N, OtherRound.getNode());
7680 if (!Chain)
7681 continue;
7683 {MVT::v4f32, MVT::Other}, {Chain, Vec});
7684 Chain = VRound.getValue(1);
7685 } else
7686 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
7687 MVT::v4f32, Vec);
7688 DCI.AddToWorklist(VRound.getNode());
7689 SDValue Extract1 =
7690 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
7691 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
7692 DCI.AddToWorklist(Extract1.getNode());
7693 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
7694 if (Chain)
7695 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
7696 SDValue Extract0 =
7697 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
7698 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7699 if (Chain)
7700 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7701 N->getVTList(), Extract0, Chain);
7702 return Extract0;
7703 }
7704 }
7705 }
7706 }
7707 return SDValue();
7708}
7709
7710SDValue SystemZTargetLowering::combineFP_EXTEND(
7711 SDNode *N, DAGCombinerInfo &DCI) const {
7712
7713 if (!Subtarget.hasVector())
7714 return SDValue();
7715
7716 // (fpextend (extract_vector_elt X 0))
7717 // (fpextend (extract_vector_elt X 2)) ->
7718 // (extract_vector_elt (VEXTEND X) 0)
7719 // (extract_vector_elt (VEXTEND X) 1)
7720 //
7721 // This is a special case since the target doesn't really support v2f32s.
7722 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7723 SelectionDAG &DAG = DCI.DAG;
7724 SDValue Op0 = N->getOperand(OpNo);
7725 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
7727 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
7728 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7729 Op0.getConstantOperandVal(1) == 0) {
7730 SDValue Vec = Op0.getOperand(0);
7731 for (auto *U : Vec->users()) {
7732 if (U != Op0.getNode() && U->hasOneUse() &&
7733 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7734 U->getOperand(0) == Vec &&
7735 U->getOperand(1).getOpcode() == ISD::Constant &&
7736 U->getConstantOperandVal(1) == 2) {
7737 SDValue OtherExtend = SDValue(*U->user_begin(), 0);
7738 if (OtherExtend.getOpcode() == N->getOpcode() &&
7739 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
7740 OtherExtend.getValueType() == MVT::f64) {
7741 SDValue VExtend, Chain;
7742 if (N->isStrictFPOpcode()) {
7743 Chain = MergeInputChains(N, OtherExtend.getNode());
7744 if (!Chain)
7745 continue;
7746 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
7747 {MVT::v2f64, MVT::Other}, {Chain, Vec});
7748 Chain = VExtend.getValue(1);
7749 } else
7750 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
7751 MVT::v2f64, Vec);
7752 DCI.AddToWorklist(VExtend.getNode());
7753 SDValue Extract1 =
7754 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
7755 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
7756 DCI.AddToWorklist(Extract1.getNode());
7757 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
7758 if (Chain)
7759 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
7760 SDValue Extract0 =
7761 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
7762 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7763 if (Chain)
7764 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7765 N->getVTList(), Extract0, Chain);
7766 return Extract0;
7767 }
7768 }
7769 }
7770 }
7771 return SDValue();
7772}
7773
7774SDValue SystemZTargetLowering::combineINT_TO_FP(
7775 SDNode *N, DAGCombinerInfo &DCI) const {
7776 if (DCI.Level != BeforeLegalizeTypes)
7777 return SDValue();
7778 SelectionDAG &DAG = DCI.DAG;
7779 LLVMContext &Ctx = *DAG.getContext();
7780 unsigned Opcode = N->getOpcode();
7781 EVT OutVT = N->getValueType(0);
7782 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
7783 SDValue Op = N->getOperand(0);
7784 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
7785 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
7786
7787 // Insert an extension before type-legalization to avoid scalarization, e.g.:
7788 // v2f64 = uint_to_fp v2i16
7789 // =>
7790 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
7791 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
7792 OutScalarBits <= 64) {
7793 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
7794 EVT ExtVT = EVT::getVectorVT(
7795 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
7796 unsigned ExtOpcode =
7798 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
7799 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
7800 }
7801 return SDValue();
7802}
7803
7804SDValue SystemZTargetLowering::combineBSWAP(
7805 SDNode *N, DAGCombinerInfo &DCI) const {
7806 SelectionDAG &DAG = DCI.DAG;
7807 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
7808 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7809 N->getOperand(0).hasOneUse() &&
7810 canLoadStoreByteSwapped(N->getValueType(0))) {
7811 SDValue Load = N->getOperand(0);
7812 LoadSDNode *LD = cast<LoadSDNode>(Load);
7813
7814 // Create the byte-swapping load.
7815 SDValue Ops[] = {
7816 LD->getChain(), // Chain
7817 LD->getBasePtr() // Ptr
7818 };
7819 EVT LoadVT = N->getValueType(0);
7820 if (LoadVT == MVT::i16)
7821 LoadVT = MVT::i32;
7822 SDValue BSLoad =
7824 DAG.getVTList(LoadVT, MVT::Other),
7825 Ops, LD->getMemoryVT(), LD->getMemOperand());
7826
7827 // If this is an i16 load, insert the truncate.
7828 SDValue ResVal = BSLoad;
7829 if (N->getValueType(0) == MVT::i16)
7830 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
7831
7832 // First, combine the bswap away. This makes the value produced by the
7833 // load dead.
7834 DCI.CombineTo(N, ResVal);
7835
7836 // Next, combine the load away, we give it a bogus result value but a real
7837 // chain result. The result value is dead because the bswap is dead.
7838 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
7839
7840 // Return N so it doesn't get rechecked!
7841 return SDValue(N, 0);
7842 }
7843
7844 // Look through bitcasts that retain the number of vector elements.
7845 SDValue Op = N->getOperand(0);
7846 if (Op.getOpcode() == ISD::BITCAST &&
7847 Op.getValueType().isVector() &&
7848 Op.getOperand(0).getValueType().isVector() &&
7849 Op.getValueType().getVectorNumElements() ==
7850 Op.getOperand(0).getValueType().getVectorNumElements())
7851 Op = Op.getOperand(0);
7852
7853 // Push BSWAP into a vector insertion if at least one side then simplifies.
7854 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
7855 SDValue Vec = Op.getOperand(0);
7856 SDValue Elt = Op.getOperand(1);
7857 SDValue Idx = Op.getOperand(2);
7858
7860 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
7862 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
7863 (canLoadStoreByteSwapped(N->getValueType(0)) &&
7864 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
7865 EVT VecVT = N->getValueType(0);
7866 EVT EltVT = N->getValueType(0).getVectorElementType();
7867 if (VecVT != Vec.getValueType()) {
7868 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
7869 DCI.AddToWorklist(Vec.getNode());
7870 }
7871 if (EltVT != Elt.getValueType()) {
7872 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
7873 DCI.AddToWorklist(Elt.getNode());
7874 }
7875 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
7876 DCI.AddToWorklist(Vec.getNode());
7877 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
7878 DCI.AddToWorklist(Elt.getNode());
7879 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
7880 Vec, Elt, Idx);
7881 }
7882 }
7883
7884 // Push BSWAP into a vector shuffle if at least one side then simplifies.
7885 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
7886 if (SV && Op.hasOneUse()) {
7887 SDValue Op0 = Op.getOperand(0);
7888 SDValue Op1 = Op.getOperand(1);
7889
7891 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
7893 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
7894 EVT VecVT = N->getValueType(0);
7895 if (VecVT != Op0.getValueType()) {
7896 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
7897 DCI.AddToWorklist(Op0.getNode());
7898 }
7899 if (VecVT != Op1.getValueType()) {
7900 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
7901 DCI.AddToWorklist(Op1.getNode());
7902 }
7903 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
7904 DCI.AddToWorklist(Op0.getNode());
7905 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
7906 DCI.AddToWorklist(Op1.getNode());
7907 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
7908 }
7909 }
7910
7911 return SDValue();
7912}
7913
7914static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
7915 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
7916 // set by the CCReg instruction using the CCValid / CCMask masks,
7917 // If the CCReg instruction is itself a ICMP testing the condition
7918 // code set by some other instruction, see whether we can directly
7919 // use that condition code.
7920
7921 // Verify that we have an ICMP against some constant.
7922 if (CCValid != SystemZ::CCMASK_ICMP)
7923 return false;
7924 auto *ICmp = CCReg.getNode();
7925 if (ICmp->getOpcode() != SystemZISD::ICMP)
7926 return false;
7927 auto *CompareLHS = ICmp->getOperand(0).getNode();
7928 auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
7929 if (!CompareRHS)
7930 return false;
7931
7932 // Optimize the case where CompareLHS is a SELECT_CCMASK.
7933 if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
7934 // Verify that we have an appropriate mask for a EQ or NE comparison.
7935 bool Invert = false;
7936 if (CCMask == SystemZ::CCMASK_CMP_NE)
7937 Invert = !Invert;
7938 else if (CCMask != SystemZ::CCMASK_CMP_EQ)
7939 return false;
7940
7941 // Verify that the ICMP compares against one of select values.
7942 auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
7943 if (!TrueVal)
7944 return false;
7945 auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7946 if (!FalseVal)
7947 return false;
7948 if (CompareRHS->getZExtValue() == FalseVal->getZExtValue())
7949 Invert = !Invert;
7950 else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue())
7951 return false;
7952
7953 // Compute the effective CC mask for the new branch or select.
7954 auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
7955 auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
7956 if (!NewCCValid || !NewCCMask)
7957 return false;
7958 CCValid = NewCCValid->getZExtValue();
7959 CCMask = NewCCMask->getZExtValue();
7960 if (Invert)
7961 CCMask ^= CCValid;
7962
7963 // Return the updated CCReg link.
7964 CCReg = CompareLHS->getOperand(4);
7965 return true;
7966 }
7967
7968 // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
7969 if (CompareLHS->getOpcode() == ISD::SRA) {
7970 auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7971 if (!SRACount || SRACount->getZExtValue() != 30)
7972 return false;
7973 auto *SHL = CompareLHS->getOperand(0).getNode();
7974 if (SHL->getOpcode() != ISD::SHL)
7975 return false;
7976 auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
7977 if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
7978 return false;
7979 auto *IPM = SHL->getOperand(0).getNode();
7980 if (IPM->getOpcode() != SystemZISD::IPM)
7981 return false;
7982
7983 // Avoid introducing CC spills (because SRA would clobber CC).
7984 if (!CompareLHS->hasOneUse())
7985 return false;
7986 // Verify that the ICMP compares against zero.
7987 if (CompareRHS->getZExtValue() != 0)
7988 return false;
7989
7990 // Compute the effective CC mask for the new branch or select.
7991 CCMask = SystemZ::reverseCCMask(CCMask);
7992
7993 // Return the updated CCReg link.
7994 CCReg = IPM->getOperand(0);
7995 return true;
7996 }
7997
7998 return false;
7999}
8000
8001SDValue SystemZTargetLowering::combineBR_CCMASK(
8002 SDNode *N, DAGCombinerInfo &DCI) const {
8003 SelectionDAG &DAG = DCI.DAG;
8004
8005 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
8006 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
8007 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
8008 if (!CCValid || !CCMask)
8009 return SDValue();
8010
8011 int CCValidVal = CCValid->getZExtValue();
8012 int CCMaskVal = CCMask->getZExtValue();
8013 SDValue Chain = N->getOperand(0);
8014 SDValue CCReg = N->getOperand(4);
8015
8016 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
8017 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
8018 Chain,
8019 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
8020 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
8021 N->getOperand(3), CCReg);
8022 return SDValue();
8023}
8024
8025SDValue SystemZTargetLowering::combineSELECT_CCMASK(
8026 SDNode *N, DAGCombinerInfo &DCI) const {
8027 SelectionDAG &DAG = DCI.DAG;
8028
8029 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
8030 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
8031 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
8032 if (!CCValid || !CCMask)
8033 return SDValue();
8034
8035 int CCValidVal = CCValid->getZExtValue();
8036 int CCMaskVal = CCMask->getZExtValue();
8037 SDValue CCReg = N->getOperand(4);
8038
8039 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
8040 return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
8041 N->getOperand(0), N->getOperand(1),
8042 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
8043 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
8044 CCReg);
8045 return SDValue();
8046}
8047
8048
8049SDValue SystemZTargetLowering::combineGET_CCMASK(
8050 SDNode *N, DAGCombinerInfo &DCI) const {
8051
8052 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
8053 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
8054 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
8055 if (!CCValid || !CCMask)
8056 return SDValue();
8057 int CCValidVal = CCValid->getZExtValue();
8058 int CCMaskVal = CCMask->getZExtValue();
8059
8060 SDValue Select = N->getOperand(0);
8061 if (Select->getOpcode() == ISD::TRUNCATE)
8062 Select = Select->getOperand(0);
8063 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
8064 return SDValue();
8065
8066 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
8067 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
8068 if (!SelectCCValid || !SelectCCMask)
8069 return SDValue();
8070 int SelectCCValidVal = SelectCCValid->getZExtValue();
8071 int SelectCCMaskVal = SelectCCMask->getZExtValue();
8072
8073 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
8074 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
8075 if (!TrueVal || !FalseVal)
8076 return SDValue();
8077 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
8078 ;
8079 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
8080 SelectCCMaskVal ^= SelectCCValidVal;
8081 else
8082 return SDValue();
8083
8084 if (SelectCCValidVal & ~CCValidVal)
8085 return SDValue();
8086 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
8087 return SDValue();
8088
8089 return Select->getOperand(4);
8090}
8091
8092SDValue SystemZTargetLowering::combineIntDIVREM(
8093 SDNode *N, DAGCombinerInfo &DCI) const {
8094 SelectionDAG &DAG = DCI.DAG;
8095 EVT VT = N->getValueType(0);
8096 // In the case where the divisor is a vector of constants a cheaper
8097 // sequence of instructions can replace the divide. BuildSDIV is called to
8098 // do this during DAG combining, but it only succeeds when it can build a
8099 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
8100 // since it is not Legal but Custom it can only happen before
8101 // legalization. Therefore we must scalarize this early before Combine
8102 // 1. For widened vectors, this is already the result of type legalization.
8103 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
8104 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
8105 return DAG.UnrollVectorOp(N);
8106 return SDValue();
8107}
8108
8109SDValue SystemZTargetLowering::combineINTRINSIC(
8110 SDNode *N, DAGCombinerInfo &DCI) const {
8111 SelectionDAG &DAG = DCI.DAG;
8112
8113 unsigned Id = N->getConstantOperandVal(1);
8114 switch (Id) {
8115 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
8116 // or larger is simply a vector load.
8117 case Intrinsic::s390_vll:
8118 case Intrinsic::s390_vlrl:
8119 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
8120 if (C->getZExtValue() >= 15)
8121 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
8122 N->getOperand(3), MachinePointerInfo());
8123 break;
8124 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
8125 case Intrinsic::s390_vstl:
8126 case Intrinsic::s390_vstrl:
8127 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
8128 if (C->getZExtValue() >= 15)
8129 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
8130 N->getOperand(4), MachinePointerInfo());
8131 break;
8132 }
8133
8134 return SDValue();
8135}
8136
8137SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
8138 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
8139 return N->getOperand(0);
8140 return N;
8141}
8142
8144 DAGCombinerInfo &DCI) const {
8145 switch(N->getOpcode()) {
8146 default: break;
8147 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
8148 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
8149 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
8151 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
8152 case ISD::LOAD: return combineLOAD(N, DCI);
8153 case ISD::STORE: return combineSTORE(N, DCI);
8154 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
8155 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
8156 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
8158 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
8160 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
8161 case ISD::SINT_TO_FP:
8162 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
8163 case ISD::BSWAP: return combineBSWAP(N, DCI);
8164 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
8165 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
8166 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
8167 case ISD::SDIV:
8168 case ISD::UDIV:
8169 case ISD::SREM:
8170 case ISD::UREM: return combineIntDIVREM(N, DCI);
8172 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
8173 }
8174
8175 return SDValue();
8176}
8177
8178// Return the demanded elements for the OpNo source operand of Op. DemandedElts
8179// are for Op.
8180static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
8181 unsigned OpNo) {
8182 EVT VT = Op.getValueType();
8183 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
8184 APInt SrcDemE;
8185 unsigned Opcode = Op.getOpcode();
8186 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
8187 unsigned Id = Op.getConstantOperandVal(0);
8188 switch (Id) {
8189 case Intrinsic::s390_vpksh: // PACKS
8190 case Intrinsic::s390_vpksf:
8191 case Intrinsic::s390_vpksg:
8192 case Intrinsic::s390_vpkshs: // PACKS_CC
8193 case Intrinsic::s390_vpksfs:
8194 case Intrinsic::s390_vpksgs:
8195 case Intrinsic::s390_vpklsh: // PACKLS
8196 case Intrinsic::s390_vpklsf:
8197 case Intrinsic::s390_vpklsg:
8198 case Intrinsic::s390_vpklshs: // PACKLS_CC
8199 case Intrinsic::s390_vpklsfs:
8200 case Intrinsic::s390_vpklsgs:
8201 // VECTOR PACK truncates the elements of two source vectors into one.
8202 SrcDemE = DemandedElts;
8203 if (OpNo == 2)
8204 SrcDemE.lshrInPlace(NumElts / 2);
8205 SrcDemE = SrcDemE.trunc(NumElts / 2);
8206 break;
8207 // VECTOR UNPACK extends half the elements of the source vector.
8208 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
8209 case Intrinsic::s390_vuphh:
8210 case Intrinsic::s390_vuphf:
8211 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
8212 case Intrinsic::s390_vuplhh:
8213 case Intrinsic::s390_vuplhf:
8214 SrcDemE = APInt(NumElts * 2, 0);
8215 SrcDemE.insertBits(DemandedElts, 0);
8216 break;
8217 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
8218 case Intrinsic::s390_vuplhw:
8219 case Intrinsic::s390_vuplf:
8220 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
8221 case Intrinsic::s390_vupllh:
8222 case Intrinsic::s390_vupllf:
8223 SrcDemE = APInt(NumElts * 2, 0);
8224 SrcDemE.insertBits(DemandedElts, NumElts);
8225 break;
8226 case Intrinsic::s390_vpdi: {
8227 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
8228 SrcDemE = APInt(NumElts, 0);
8229 if (!DemandedElts[OpNo - 1])
8230 break;
8231 unsigned Mask = Op.getConstantOperandVal(3);
8232 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
8233 // Demand input element 0 or 1, given by the mask bit value.
8234 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
8235 break;
8236 }
8237 case Intrinsic::s390_vsldb: {
8238 // VECTOR SHIFT LEFT DOUBLE BY BYTE
8239 assert(VT == MVT::v16i8 && "Unexpected type.");
8240 unsigned FirstIdx = Op.getConstantOperandVal(3);
8241 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
8242 unsigned NumSrc0Els = 16 - FirstIdx;
8243 SrcDemE = APInt(NumElts, 0);
8244 if (OpNo == 1) {
8245 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
8246 SrcDemE.insertBits(DemEls, FirstIdx);
8247 } else {
8248 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
8249 SrcDemE.insertBits(DemEls, 0);
8250 }
8251 break;
8252 }
8253 case Intrinsic::s390_vperm:
8254 SrcDemE = APInt::getAllOnes(NumElts);
8255 break;
8256 default:
8257 llvm_unreachable("Unhandled intrinsic.");
8258 break;
8259 }
8260 } else {
8261 switch (Opcode) {
8263 // Scalar operand.
8264 SrcDemE = APInt(1, 1);
8265 break;
8267 SrcDemE = DemandedElts;
8268 break;
8269 default:
8270 llvm_unreachable("Unhandled opcode.");
8271 break;
8272 }
8273 }
8274 return SrcDemE;
8275}
8276
8277static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
8278 const APInt &DemandedElts,
8279 const SelectionDAG &DAG, unsigned Depth,
8280 unsigned OpNo) {
8281 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
8282 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
8283 KnownBits LHSKnown =
8284 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
8285 KnownBits RHSKnown =
8286 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
8287 Known = LHSKnown.intersectWith(RHSKnown);
8288}
8289
8290void
8292 KnownBits &Known,
8293 const APInt &DemandedElts,
8294 const SelectionDAG &DAG,
8295 unsigned Depth) const {
8296 Known.resetAll();
8297
8298 // Intrinsic CC result is returned in the two low bits.
8299 unsigned tmp0, tmp1; // not used
8300 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) {
8301 Known.Zero.setBitsFrom(2);
8302 return;
8303 }
8304 EVT VT = Op.getValueType();
8305 if (Op.getResNo() != 0 || VT == MVT::Untyped)
8306 return;
8307 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
8308 "KnownBits does not match VT in bitwidth");
8309 assert ((!VT.isVector() ||
8310 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
8311 "DemandedElts does not match VT number of elements");
8312 unsigned BitWidth = Known.getBitWidth();
8313 unsigned Opcode = Op.getOpcode();
8314 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
8315 bool IsLogical = false;
8316 unsigned Id = Op.getConstantOperandVal(0);
8317 switch (Id) {
8318 case Intrinsic::s390_vpksh: // PACKS
8319 case Intrinsic::s390_vpksf:
8320 case Intrinsic::s390_vpksg:
8321 case Intrinsic::s390_vpkshs: // PACKS_CC
8322 case Intrinsic::s390_vpksfs:
8323 case Intrinsic::s390_vpksgs:
8324 case Intrinsic::s390_vpklsh: // PACKLS
8325 case Intrinsic::s390_vpklsf:
8326 case Intrinsic::s390_vpklsg:
8327 case Intrinsic::s390_vpklshs: // PACKLS_CC
8328 case Intrinsic::s390_vpklsfs:
8329 case Intrinsic::s390_vpklsgs:
8330 case Intrinsic::s390_vpdi:
8331 case Intrinsic::s390_vsldb:
8332 case Intrinsic::s390_vperm:
8333 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
8334 break;
8335 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
8336 case Intrinsic::s390_vuplhh:
8337 case Intrinsic::s390_vuplhf:
8338 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
8339 case Intrinsic::s390_vupllh:
8340 case Intrinsic::s390_vupllf:
8341 IsLogical = true;
8342 [[fallthrough]];
8343 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
8344 case Intrinsic::s390_vuphh:
8345 case Intrinsic::s390_vuphf:
8346 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
8347 case Intrinsic::s390_vuplhw:
8348 case Intrinsic::s390_vuplf: {
8349 SDValue SrcOp = Op.getOperand(1);
8350 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
8351 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
8352 if (IsLogical) {
8353 Known = Known.zext(BitWidth);
8354 } else
8355 Known = Known.sext(BitWidth);
8356 break;
8357 }
8358 default:
8359 break;
8360 }
8361 } else {
8362 switch (Opcode) {
8365 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
8366 break;
8367 case SystemZISD::REPLICATE: {
8368 SDValue SrcOp = Op.getOperand(0);
8369 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
8370 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
8371 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
8372 break;
8373 }
8374 default:
8375 break;
8376 }
8377 }
8378
8379 // Known has the width of the source operand(s). Adjust if needed to match
8380 // the passed bitwidth.
8381 if (Known.getBitWidth() != BitWidth)
8382 Known = Known.anyextOrTrunc(BitWidth);
8383}
8384
8385static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
8386 const SelectionDAG &DAG, unsigned Depth,
8387 unsigned OpNo) {
8388 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
8389 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
8390 if (LHS == 1) return 1; // Early out.
8391 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
8392 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
8393 if (RHS == 1) return 1; // Early out.
8394 unsigned Common = std::min(LHS, RHS);
8395 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
8396 EVT VT = Op.getValueType();
8397 unsigned VTBits = VT.getScalarSizeInBits();
8398 if (SrcBitWidth > VTBits) { // PACK
8399 unsigned SrcExtraBits = SrcBitWidth - VTBits;
8400 if (Common > SrcExtraBits)
8401 return (Common - SrcExtraBits);
8402 return 1;
8403 }
8404 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
8405 return Common;
8406}
8407
8408unsigned
8410 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
8411 unsigned Depth) const {
8412 if (Op.getResNo() != 0)
8413 return 1;
8414 unsigned Opcode = Op.getOpcode();
8415 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
8416 unsigned Id = Op.getConstantOperandVal(0);
8417 switch (Id) {
8418 case Intrinsic::s390_vpksh: // PACKS
8419 case Intrinsic::s390_vpksf:
8420 case Intrinsic::s390_vpksg:
8421 case Intrinsic::s390_vpkshs: // PACKS_CC
8422 case Intrinsic::s390_vpksfs:
8423 case Intrinsic::s390_vpksgs:
8424 case Intrinsic::s390_vpklsh: // PACKLS
8425 case Intrinsic::s390_vpklsf:
8426 case Intrinsic::s390_vpklsg:
8427 case Intrinsic::s390_vpklshs: // PACKLS_CC
8428 case Intrinsic::s390_vpklsfs:
8429 case Intrinsic::s390_vpklsgs:
8430 case Intrinsic::s390_vpdi:
8431 case Intrinsic::s390_vsldb:
8432 case Intrinsic::s390_vperm:
8433 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
8434 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
8435 case Intrinsic::s390_vuphh:
8436 case Intrinsic::s390_vuphf:
8437 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
8438 case Intrinsic::s390_vuplhw:
8439 case Intrinsic::s390_vuplf: {
8440 SDValue PackedOp = Op.getOperand(1);
8441 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
8442 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
8443 EVT VT = Op.getValueType();
8444 unsigned VTBits = VT.getScalarSizeInBits();
8445 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
8446 return Tmp;
8447 }
8448 default:
8449 break;
8450 }
8451 } else {
8452 switch (Opcode) {
8454 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
8455 default:
8456 break;
8457 }
8458 }
8459
8460 return 1;
8461}
8462
8465 const APInt &DemandedElts, const SelectionDAG &DAG,
8466 bool PoisonOnly, unsigned Depth) const {
8467 switch (Op->getOpcode()) {
8470 return true;
8471 }
8472 return false;
8473}
8474
8475unsigned
8477 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
8478 unsigned StackAlign = TFI->getStackAlignment();
8479 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
8480 "Unexpected stack alignment");
8481 // The default stack probe size is 4096 if the function has no
8482 // stack-probe-size attribute.
8483 unsigned StackProbeSize =
8484 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
8485 // Round down to the stack alignment.
8486 StackProbeSize &= ~(StackAlign - 1);
8487 return StackProbeSize ? StackProbeSize : StackAlign;
8488}
8489
8490//===----------------------------------------------------------------------===//
8491// Custom insertion
8492//===----------------------------------------------------------------------===//
8493
8494// Force base value Base into a register before MI. Return the register.
8496 const SystemZInstrInfo *TII) {
8497 MachineBasicBlock *MBB = MI.getParent();
8498 MachineFunction &MF = *MBB->getParent();
8500
8501 if (Base.isReg()) {
8502 // Copy Base into a new virtual register to help register coalescing in
8503 // cases with multiple uses.
8504 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8505 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
8506 .add(Base);
8507 return Reg;
8508 }
8509
8510 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8511 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
8512 .add(Base)
8513 .addImm(0)
8514 .addReg(0);
8515 return Reg;
8516}
8517
8518// The CC operand of MI might be missing a kill marker because there
8519// were multiple uses of CC, and ISel didn't know which to mark.
8520// Figure out whether MI should have had a kill marker.
8522 // Scan forward through BB for a use/def of CC.
8524 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
8525 const MachineInstr& mi = *miI;
8526 if (mi.readsRegister(SystemZ::CC, /*TRI=*/nullptr))
8527 return false;
8528 if (mi.definesRegister(SystemZ::CC, /*TRI=*/nullptr))
8529 break; // Should have kill-flag - update below.
8530 }
8531
8532 // If we hit the end of the block, check whether CC is live into a
8533 // successor.
8534 if (miI == MBB->end()) {
8535 for (const MachineBasicBlock *Succ : MBB->successors())
8536 if (Succ->isLiveIn(SystemZ::CC))
8537 return false;
8538 }
8539
8540 return true;
8541}
8542
8543// Return true if it is OK for this Select pseudo-opcode to be cascaded
8544// together with other Select pseudo-opcodes into a single basic-block with
8545// a conditional jump around it.
8547 switch (MI.getOpcode()) {
8548 case SystemZ::Select32:
8549 case SystemZ::Select64:
8550 case SystemZ::Select128:
8551 case SystemZ::SelectF32:
8552 case SystemZ::SelectF64:
8553 case SystemZ::SelectF128:
8554 case SystemZ::SelectVR32:
8555 case SystemZ::SelectVR64:
8556 case SystemZ::SelectVR128:
8557 return true;
8558
8559 default:
8560 return false;
8561 }
8562}
8563
8564// Helper function, which inserts PHI functions into SinkMBB:
8565// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
8566// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
8568 MachineBasicBlock *TrueMBB,
8569 MachineBasicBlock *FalseMBB,
8570 MachineBasicBlock *SinkMBB) {
8571 MachineFunction *MF = TrueMBB->getParent();
8573
8574 MachineInstr *FirstMI = Selects.front();
8575 unsigned CCValid = FirstMI->getOperand(3).getImm();
8576 unsigned CCMask = FirstMI->getOperand(4).getImm();
8577
8578 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
8579
8580 // As we are creating the PHIs, we have to be careful if there is more than
8581 // one. Later Selects may reference the results of earlier Selects, but later
8582 // PHIs have to reference the individual true/false inputs from earlier PHIs.
8583 // That also means that PHI construction must work forward from earlier to
8584 // later, and that the code must maintain a mapping from earlier PHI's
8585 // destination registers, and the registers that went into the PHI.
8587
8588 for (auto *MI : Selects) {
8589 Register DestReg = MI->getOperand(0).getReg();
8590 Register TrueReg = MI->getOperand(1).getReg();
8591 Register FalseReg = MI->getOperand(2).getReg();
8592
8593 // If this Select we are generating is the opposite condition from
8594 // the jump we generated, then we have to swap the operands for the
8595 // PHI that is going to be generated.
8596 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
8597 std::swap(TrueReg, FalseReg);
8598
8599 if (auto It = RegRewriteTable.find(TrueReg); It != RegRewriteTable.end())
8600 TrueReg = It->second.first;
8601
8602 if (auto It = RegRewriteTable.find(FalseReg); It != RegRewriteTable.end())
8603 FalseReg = It->second.second;
8604
8605 DebugLoc DL = MI->getDebugLoc();
8606 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
8607 .addReg(TrueReg).addMBB(TrueMBB)
8608 .addReg(FalseReg).addMBB(FalseMBB);
8609
8610 // Add this PHI to the rewrite table.
8611 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
8612 }
8613
8615}
8616
8618SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
8619 MachineBasicBlock *BB) const {
8620 MachineFunction &MF = *BB->getParent();
8621 MachineFrameInfo &MFI = MF.getFrameInfo();
8622 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
8623 assert(TFL->hasReservedCallFrame(MF) &&
8624 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
8625 (void)TFL;
8626 // Get the MaxCallFrameSize value and erase MI since it serves no further
8627 // purpose as the call frame is statically reserved in the prolog. Set
8628 // AdjustsStack as MI is *not* mapped as a frame instruction.
8629 uint32_t NumBytes = MI.getOperand(0).getImm();
8630 if (NumBytes > MFI.getMaxCallFrameSize())
8631 MFI.setMaxCallFrameSize(NumBytes);
8632 MFI.setAdjustsStack(true);
8633
8634 MI.eraseFromParent();
8635 return BB;
8636}
8637
8638// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
8640SystemZTargetLowering::emitSelect(MachineInstr &MI,
8641 MachineBasicBlock *MBB) const {
8642 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
8643 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8644
8645 unsigned CCValid = MI.getOperand(3).getImm();
8646 unsigned CCMask = MI.getOperand(4).getImm();
8647
8648 // If we have a sequence of Select* pseudo instructions using the
8649 // same condition code value, we want to expand all of them into
8650 // a single pair of basic blocks using the same condition.
8653 Selects.push_back(&MI);
8654 unsigned Count = 0;
8655 for (MachineInstr &NextMI : llvm::make_range(
8656 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
8657 if (isSelectPseudo(NextMI)) {
8658 assert(NextMI.getOperand(3).getImm() == CCValid &&
8659 "Bad CCValid operands since CC was not redefined.");
8660 if (NextMI.getOperand(4).getImm() == CCMask ||
8661 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
8662 Selects.push_back(&NextMI);
8663 continue;
8664 }
8665 break;
8666 }
8667 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) ||
8668 NextMI.usesCustomInsertionHook())
8669 break;
8670 bool User = false;
8671 for (auto *SelMI : Selects)
8672 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
8673 User = true;
8674 break;
8675 }
8676 if (NextMI.isDebugInstr()) {
8677 if (User) {
8678 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
8679 DbgValues.push_back(&NextMI);
8680 }
8681 } else if (User || ++Count > 20)
8682 break;
8683 }
8684
8685 MachineInstr *LastMI = Selects.back();
8686 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) ||
8687 checkCCKill(*LastMI, MBB));
8688 MachineBasicBlock *StartMBB = MBB;
8690 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8691
8692 // Unless CC was killed in the last Select instruction, mark it as
8693 // live-in to both FalseMBB and JoinMBB.
8694 if (!CCKilled) {
8695 FalseMBB->addLiveIn(SystemZ::CC);
8696 JoinMBB->addLiveIn(SystemZ::CC);
8697 }
8698
8699 // StartMBB:
8700 // BRC CCMask, JoinMBB
8701 // # fallthrough to FalseMBB
8702 MBB = StartMBB;
8703 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8704 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8705 MBB->addSuccessor(JoinMBB);
8706 MBB->addSuccessor(FalseMBB);
8707
8708 // FalseMBB:
8709 // # fallthrough to JoinMBB
8710 MBB = FalseMBB;
8711 MBB->addSuccessor(JoinMBB);
8712
8713 // JoinMBB:
8714 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
8715 // ...
8716 MBB = JoinMBB;
8717 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
8718 for (auto *SelMI : Selects)
8719 SelMI->eraseFromParent();
8720
8722 for (auto *DbgMI : DbgValues)
8723 MBB->splice(InsertPos, StartMBB, DbgMI);
8724
8725 return JoinMBB;
8726}
8727
8728// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
8729// StoreOpcode is the store to use and Invert says whether the store should
8730// happen when the condition is false rather than true. If a STORE ON
8731// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
8732MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
8734 unsigned StoreOpcode,
8735 unsigned STOCOpcode,
8736 bool Invert) const {
8737 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8738
8739 Register SrcReg = MI.getOperand(0).getReg();
8740 MachineOperand Base = MI.getOperand(1);
8741 int64_t Disp = MI.getOperand(2).getImm();
8742 Register IndexReg = MI.getOperand(3).getReg();
8743 unsigned CCValid = MI.getOperand(4).getImm();
8744 unsigned CCMask = MI.getOperand(5).getImm();
8745 DebugLoc DL = MI.getDebugLoc();
8746
8747 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
8748
8749 // ISel pattern matching also adds a load memory operand of the same
8750 // address, so take special care to find the storing memory operand.
8751 MachineMemOperand *MMO = nullptr;
8752 for (auto *I : MI.memoperands())
8753 if (I->isStore()) {
8754 MMO = I;
8755 break;
8756 }
8757
8758 // Use STOCOpcode if possible. We could use different store patterns in
8759 // order to avoid matching the index register, but the performance trade-offs
8760 // might be more complicated in that case.
8761 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
8762 if (Invert)
8763 CCMask ^= CCValid;
8764
8765 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
8766 .addReg(SrcReg)
8767 .add(Base)
8768 .addImm(Disp)
8769 .addImm(CCValid)
8770 .addImm(CCMask)
8771 .addMemOperand(MMO);
8772
8773 MI.eraseFromParent();
8774 return MBB;
8775 }
8776
8777 // Get the condition needed to branch around the store.
8778 if (!Invert)
8779 CCMask ^= CCValid;
8780
8781 MachineBasicBlock *StartMBB = MBB;
8783 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8784
8785 // Unless CC was killed in the CondStore instruction, mark it as
8786 // live-in to both FalseMBB and JoinMBB.
8787 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) &&
8788 !checkCCKill(MI, JoinMBB)) {
8789 FalseMBB->addLiveIn(SystemZ::CC);
8790 JoinMBB->addLiveIn(SystemZ::CC);
8791 }
8792
8793 // StartMBB:
8794 // BRC CCMask, JoinMBB
8795 // # fallthrough to FalseMBB
8796 MBB = StartMBB;
8797 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8798 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8799 MBB->addSuccessor(JoinMBB);
8800 MBB->addSuccessor(FalseMBB);
8801
8802 // FalseMBB:
8803 // store %SrcReg, %Disp(%Index,%Base)
8804 // # fallthrough to JoinMBB
8805 MBB = FalseMBB;
8806 BuildMI(MBB, DL, TII->get(StoreOpcode))
8807 .addReg(SrcReg)
8808 .add(Base)
8809 .addImm(Disp)
8810 .addReg(IndexReg)
8811 .addMemOperand(MMO);
8812 MBB->addSuccessor(JoinMBB);
8813
8814 MI.eraseFromParent();
8815 return JoinMBB;
8816}
8817
8818// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
8820SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
8822 bool Unsigned) const {
8823 MachineFunction &MF = *MBB->getParent();
8824 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8826
8827 // Synthetic instruction to compare 128-bit values.
8828 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
8829 Register Op0 = MI.getOperand(0).getReg();
8830 Register Op1 = MI.getOperand(1).getReg();
8831
8832 MachineBasicBlock *StartMBB = MBB;
8834 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
8835
8836 // StartMBB:
8837 //
8838 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
8839 // Swap the inputs to get:
8840 // CC 1 if high(Op0) > high(Op1)
8841 // CC 2 if high(Op0) < high(Op1)
8842 // CC 0 if high(Op0) == high(Op1)
8843 //
8844 // If CC != 0, we'd done, so jump over the next instruction.
8845 //
8846 // VEC[L]G Op1, Op0
8847 // JNE JoinMBB
8848 // # fallthrough to HiEqMBB
8849 MBB = StartMBB;
8850 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
8851 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
8852 .addReg(Op1).addReg(Op0);
8853 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8855 MBB->addSuccessor(JoinMBB);
8856 MBB->addSuccessor(HiEqMBB);
8857
8858 // HiEqMBB:
8859 //
8860 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
8861 // Since we already know the high parts are equal, the CC
8862 // result will only depend on the low parts:
8863 // CC 1 if low(Op0) > low(Op1)
8864 // CC 3 if low(Op0) <= low(Op1)
8865 //
8866 // VCHLGS Tmp, Op0, Op1
8867 // # fallthrough to JoinMBB
8868 MBB = HiEqMBB;
8869 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
8870 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
8871 .addReg(Op0).addReg(Op1);
8872 MBB->addSuccessor(JoinMBB);
8873
8874 // Mark CC as live-in to JoinMBB.
8875 JoinMBB->addLiveIn(SystemZ::CC);
8876
8877 MI.eraseFromParent();
8878 return JoinMBB;
8879}
8880
8881// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
8882// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
8883// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
8884// whether the field should be inverted after performing BinOpcode (e.g. for
8885// NAND).
8886MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
8887 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
8888 bool Invert) const {
8889 MachineFunction &MF = *MBB->getParent();
8890 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8892
8893 // Extract the operands. Base can be a register or a frame index.
8894 // Src2 can be a register or immediate.
8895 Register Dest = MI.getOperand(0).getReg();
8896 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8897 int64_t Disp = MI.getOperand(2).getImm();
8898 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
8899 Register BitShift = MI.getOperand(4).getReg();
8900 Register NegBitShift = MI.getOperand(5).getReg();
8901 unsigned BitSize = MI.getOperand(6).getImm();
8902 DebugLoc DL = MI.getDebugLoc();
8903
8904 // Get the right opcodes for the displacement.
8905 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8906 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8907 assert(LOpcode && CSOpcode && "Displacement out of range");
8908
8909 // Create virtual registers for temporary results.
8910 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8911 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8912 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8913 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8914 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8915
8916 // Insert a basic block for the main loop.
8917 MachineBasicBlock *StartMBB = MBB;
8919 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8920
8921 // StartMBB:
8922 // ...
8923 // %OrigVal = L Disp(%Base)
8924 // # fall through to LoopMBB
8925 MBB = StartMBB;
8926 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
8927 MBB->addSuccessor(LoopMBB);
8928
8929 // LoopMBB:
8930 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
8931 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
8932 // %RotatedNewVal = OP %RotatedOldVal, %Src2
8933 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
8934 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
8935 // JNE LoopMBB
8936 // # fall through to DoneMBB
8937 MBB = LoopMBB;
8938 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8939 .addReg(OrigVal).addMBB(StartMBB)
8940 .addReg(Dest).addMBB(LoopMBB);
8941 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
8942 .addReg(OldVal).addReg(BitShift).addImm(0);
8943 if (Invert) {
8944 // Perform the operation normally and then invert every bit of the field.
8945 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8946 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
8947 // XILF with the upper BitSize bits set.
8948 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
8949 .addReg(Tmp).addImm(-1U << (32 - BitSize));
8950 } else if (BinOpcode)
8951 // A simply binary operation.
8952 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
8953 .addReg(RotatedOldVal)
8954 .add(Src2);
8955 else
8956 // Use RISBG to rotate Src2 into position and use it to replace the
8957 // field in RotatedOldVal.
8958 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
8959 .addReg(RotatedOldVal).addReg(Src2.getReg())
8960 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
8961 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
8962 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
8963 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
8964 .addReg(OldVal)
8965 .addReg(NewVal)
8966 .add(Base)
8967 .addImm(Disp);
8968 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8970 MBB->addSuccessor(LoopMBB);
8971 MBB->addSuccessor(DoneMBB);
8972
8973 MI.eraseFromParent();
8974 return DoneMBB;
8975}
8976
8977// Implement EmitInstrWithCustomInserter for subword pseudo
8978// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
8979// instruction that should be used to compare the current field with the
8980// minimum or maximum value. KeepOldMask is the BRC condition-code mask
8981// for when the current field should be kept.
8982MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
8983 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
8984 unsigned KeepOldMask) const {
8985 MachineFunction &MF = *MBB->getParent();
8986 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8988
8989 // Extract the operands. Base can be a register or a frame index.
8990 Register Dest = MI.getOperand(0).getReg();
8991 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8992 int64_t Disp = MI.getOperand(2).getImm();
8993 Register Src2 = MI.getOperand(3).getReg();
8994 Register BitShift = MI.getOperand(4).getReg();
8995 Register NegBitShift = MI.getOperand(5).getReg();
8996 unsigned BitSize = MI.getOperand(6).getImm();
8997 DebugLoc DL = MI.getDebugLoc();
8998
8999 // Get the right opcodes for the displacement.
9000 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
9001 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
9002 assert(LOpcode && CSOpcode && "Displacement out of range");
9003
9004 // Create virtual registers for temporary results.
9005 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9006 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9007 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9008 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9009 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9010 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
9011
9012 // Insert 3 basic blocks for the loop.
9013 MachineBasicBlock *StartMBB = MBB;
9015 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9016 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
9017 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
9018
9019 // StartMBB:
9020 // ...
9021 // %OrigVal = L Disp(%Base)
9022 // # fall through to LoopMBB
9023 MBB = StartMBB;
9024 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
9025 MBB->addSuccessor(LoopMBB);
9026
9027 // LoopMBB:
9028 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
9029 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
9030 // CompareOpcode %RotatedOldVal, %Src2
9031 // BRC KeepOldMask, UpdateMBB
9032 MBB = LoopMBB;
9033 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
9034 .addReg(OrigVal).addMBB(StartMBB)
9035 .addReg(Dest).addMBB(UpdateMBB);
9036 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
9037 .addReg(OldVal).addReg(BitShift).addImm(0);
9038 BuildMI(MBB, DL, TII->get(CompareOpcode))
9039 .addReg(RotatedOldVal).addReg(Src2);
9040 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9041 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
9042 MBB->addSuccessor(UpdateMBB);
9043 MBB->addSuccessor(UseAltMBB);
9044
9045 // UseAltMBB:
9046 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
9047 // # fall through to UpdateMBB
9048 MBB = UseAltMBB;
9049 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
9050 .addReg(RotatedOldVal).addReg(Src2)
9051 .addImm(32).addImm(31 + BitSize).addImm(0);
9052 MBB->addSuccessor(UpdateMBB);
9053
9054 // UpdateMBB:
9055 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
9056 // [ %RotatedAltVal, UseAltMBB ]
9057 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
9058 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
9059 // JNE LoopMBB
9060 // # fall through to DoneMBB
9061 MBB = UpdateMBB;
9062 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
9063 .addReg(RotatedOldVal).addMBB(LoopMBB)
9064 .addReg(RotatedAltVal).addMBB(UseAltMBB);
9065 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
9066 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
9067 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
9068 .addReg(OldVal)
9069 .addReg(NewVal)
9070 .add(Base)
9071 .addImm(Disp);
9072 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9074 MBB->addSuccessor(LoopMBB);
9075 MBB->addSuccessor(DoneMBB);
9076
9077 MI.eraseFromParent();
9078 return DoneMBB;
9079}
9080
9081// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
9082// instruction MI.
9084SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
9085 MachineBasicBlock *MBB) const {
9086 MachineFunction &MF = *MBB->getParent();
9087 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9089
9090 // Extract the operands. Base can be a register or a frame index.
9091 Register Dest = MI.getOperand(0).getReg();
9092 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
9093 int64_t Disp = MI.getOperand(2).getImm();
9094 Register CmpVal = MI.getOperand(3).getReg();
9095 Register OrigSwapVal = MI.getOperand(4).getReg();
9096 Register BitShift = MI.getOperand(5).getReg();
9097 Register NegBitShift = MI.getOperand(6).getReg();
9098 int64_t BitSize = MI.getOperand(7).getImm();
9099 DebugLoc DL = MI.getDebugLoc();
9100
9101 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
9102
9103 // Get the right opcodes for the displacement and zero-extension.
9104 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
9105 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
9106 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
9107 assert(LOpcode && CSOpcode && "Displacement out of range");
9108
9109 // Create virtual registers for temporary results.
9110 Register OrigOldVal = MRI.createVirtualRegister(RC);
9111 Register OldVal = MRI.createVirtualRegister(RC);
9112 Register SwapVal = MRI.createVirtualRegister(RC);
9113 Register StoreVal = MRI.createVirtualRegister(RC);
9114 Register OldValRot = MRI.createVirtualRegister(RC);
9115 Register RetryOldVal = MRI.createVirtualRegister(RC);
9116 Register RetrySwapVal = MRI.createVirtualRegister(RC);
9117
9118 // Insert 2 basic blocks for the loop.
9119 MachineBasicBlock *StartMBB = MBB;
9121 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9122 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
9123
9124 // StartMBB:
9125 // ...
9126 // %OrigOldVal = L Disp(%Base)
9127 // # fall through to LoopMBB
9128 MBB = StartMBB;
9129 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
9130 .add(Base)
9131 .addImm(Disp)
9132 .addReg(0);
9133 MBB->addSuccessor(LoopMBB);
9134
9135 // LoopMBB:
9136 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
9137 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
9138 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
9139 // ^^ The low BitSize bits contain the field
9140 // of interest.
9141 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
9142 // ^^ Replace the upper 32-BitSize bits of the
9143 // swap value with those that we loaded and rotated.
9144 // %Dest = LL[CH] %OldValRot
9145 // CR %Dest, %CmpVal
9146 // JNE DoneMBB
9147 // # Fall through to SetMBB
9148 MBB = LoopMBB;
9149 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
9150 .addReg(OrigOldVal).addMBB(StartMBB)
9151 .addReg(RetryOldVal).addMBB(SetMBB);
9152 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
9153 .addReg(OrigSwapVal).addMBB(StartMBB)
9154 .addReg(RetrySwapVal).addMBB(SetMBB);
9155 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
9156 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
9157 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
9158 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
9159 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
9160 .addReg(OldValRot);
9161 BuildMI(MBB, DL, TII->get(SystemZ::CR))
9162 .addReg(Dest).addReg(CmpVal);
9163 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9166 MBB->addSuccessor(DoneMBB);
9167 MBB->addSuccessor(SetMBB);
9168
9169 // SetMBB:
9170 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
9171 // ^^ Rotate the new field to its proper position.
9172 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
9173 // JNE LoopMBB
9174 // # fall through to ExitMBB
9175 MBB = SetMBB;
9176 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
9177 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
9178 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
9179 .addReg(OldVal)
9180 .addReg(StoreVal)
9181 .add(Base)
9182 .addImm(Disp);
9183 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9185 MBB->addSuccessor(LoopMBB);
9186 MBB->addSuccessor(DoneMBB);
9187
9188 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
9189 // to the block after the loop. At this point, CC may have been defined
9190 // either by the CR in LoopMBB or by the CS in SetMBB.
9191 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))
9192 DoneMBB->addLiveIn(SystemZ::CC);
9193
9194 MI.eraseFromParent();
9195 return DoneMBB;
9196}
9197
9198// Emit a move from two GR64s to a GR128.
9200SystemZTargetLowering::emitPair128(MachineInstr &MI,
9201 MachineBasicBlock *MBB) const {
9202 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9203 const DebugLoc &DL = MI.getDebugLoc();
9204
9205 Register Dest = MI.getOperand(0).getReg();
9206 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest)
9207 .add(MI.getOperand(1))
9208 .addImm(SystemZ::subreg_h64)
9209 .add(MI.getOperand(2))
9210 .addImm(SystemZ::subreg_l64);
9211 MI.eraseFromParent();
9212 return MBB;
9213}
9214
9215// Emit an extension from a GR64 to a GR128. ClearEven is true
9216// if the high register of the GR128 value must be cleared or false if
9217// it's "don't care".
9218MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
9220 bool ClearEven) const {
9221 MachineFunction &MF = *MBB->getParent();
9222 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9224 DebugLoc DL = MI.getDebugLoc();
9225
9226 Register Dest = MI.getOperand(0).getReg();
9227 Register Src = MI.getOperand(1).getReg();
9228 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
9229
9230 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
9231 if (ClearEven) {
9232 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
9233 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
9234
9235 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
9236 .addImm(0);
9237 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
9238 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
9239 In128 = NewIn128;
9240 }
9241 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
9242 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
9243
9244 MI.eraseFromParent();
9245 return MBB;
9246}
9247
9249SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
9251 unsigned Opcode, bool IsMemset) const {
9252 MachineFunction &MF = *MBB->getParent();
9253 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9255 DebugLoc DL = MI.getDebugLoc();
9256
9257 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
9258 uint64_t DestDisp = MI.getOperand(1).getImm();
9259 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
9260 uint64_t SrcDisp;
9261
9262 // Fold the displacement Disp if it is out of range.
9263 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
9264 if (!isUInt<12>(Disp)) {
9265 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9266 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
9267 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
9268 .add(Base).addImm(Disp).addReg(0);
9269 Base = MachineOperand::CreateReg(Reg, false);
9270 Disp = 0;
9271 }
9272 };
9273
9274 if (!IsMemset) {
9275 SrcBase = earlyUseOperand(MI.getOperand(2));
9276 SrcDisp = MI.getOperand(3).getImm();
9277 } else {
9278 SrcBase = DestBase;
9279 SrcDisp = DestDisp++;
9280 foldDisplIfNeeded(DestBase, DestDisp);
9281 }
9282
9283 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
9284 bool IsImmForm = LengthMO.isImm();
9285 bool IsRegForm = !IsImmForm;
9286
9287 // Build and insert one Opcode of Length, with special treatment for memset.
9288 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
9290 MachineOperand DBase, uint64_t DDisp,
9292 unsigned Length) -> void {
9293 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
9294 if (IsMemset) {
9295 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
9296 if (ByteMO.isImm())
9297 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
9298 .add(SBase).addImm(SDisp).add(ByteMO);
9299 else
9300 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
9301 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
9302 if (--Length == 0)
9303 return;
9304 }
9305 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
9306 .add(DBase).addImm(DDisp).addImm(Length)
9307 .add(SBase).addImm(SDisp)
9308 .setMemRefs(MI.memoperands());
9309 };
9310
9311 bool NeedsLoop = false;
9312 uint64_t ImmLength = 0;
9313 Register LenAdjReg = SystemZ::NoRegister;
9314 if (IsImmForm) {
9315 ImmLength = LengthMO.getImm();
9316 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
9317 if (ImmLength == 0) {
9318 MI.eraseFromParent();
9319 return MBB;
9320 }
9321 if (Opcode == SystemZ::CLC) {
9322 if (ImmLength > 3 * 256)
9323 // A two-CLC sequence is a clear win over a loop, not least because
9324 // it needs only one branch. A three-CLC sequence needs the same
9325 // number of branches as a loop (i.e. 2), but is shorter. That
9326 // brings us to lengths greater than 768 bytes. It seems relatively
9327 // likely that a difference will be found within the first 768 bytes,
9328 // so we just optimize for the smallest number of branch
9329 // instructions, in order to avoid polluting the prediction buffer
9330 // too much.
9331 NeedsLoop = true;
9332 } else if (ImmLength > 6 * 256)
9333 // The heuristic we use is to prefer loops for anything that would
9334 // require 7 or more MVCs. With these kinds of sizes there isn't much
9335 // to choose between straight-line code and looping code, since the
9336 // time will be dominated by the MVCs themselves.
9337 NeedsLoop = true;
9338 } else {
9339 NeedsLoop = true;
9340 LenAdjReg = LengthMO.getReg();
9341 }
9342
9343 // When generating more than one CLC, all but the last will need to
9344 // branch to the end when a difference is found.
9345 MachineBasicBlock *EndMBB =
9346 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
9348 : nullptr);
9349
9350 if (NeedsLoop) {
9351 Register StartCountReg =
9352 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
9353 if (IsImmForm) {
9354 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
9355 ImmLength &= 255;
9356 } else {
9357 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
9358 .addReg(LenAdjReg)
9359 .addReg(0)
9360 .addImm(8);
9361 }
9362
9363 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
9364 auto loadZeroAddress = [&]() -> MachineOperand {
9365 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9366 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
9367 return MachineOperand::CreateReg(Reg, false);
9368 };
9369 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
9370 DestBase = loadZeroAddress();
9371 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
9372 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
9373
9374 MachineBasicBlock *StartMBB = nullptr;
9375 MachineBasicBlock *LoopMBB = nullptr;
9376 MachineBasicBlock *NextMBB = nullptr;
9377 MachineBasicBlock *DoneMBB = nullptr;
9378 MachineBasicBlock *AllDoneMBB = nullptr;
9379
9380 Register StartSrcReg = forceReg(MI, SrcBase, TII);
9381 Register StartDestReg =
9382 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
9383
9384 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
9385 Register ThisSrcReg = MRI.createVirtualRegister(RC);
9386 Register ThisDestReg =
9387 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
9388 Register NextSrcReg = MRI.createVirtualRegister(RC);
9389 Register NextDestReg =
9390 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
9391 RC = &SystemZ::GR64BitRegClass;
9392 Register ThisCountReg = MRI.createVirtualRegister(RC);
9393 Register NextCountReg = MRI.createVirtualRegister(RC);
9394
9395 if (IsRegForm) {
9396 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
9397 StartMBB = SystemZ::emitBlockAfter(MBB);
9398 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9399 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
9400 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
9401
9402 // MBB:
9403 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
9404 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9405 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
9406 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9408 .addMBB(AllDoneMBB);
9409 MBB->addSuccessor(AllDoneMBB);
9410 if (!IsMemset)
9411 MBB->addSuccessor(StartMBB);
9412 else {
9413 // MemsetOneCheckMBB:
9414 // # Jump to MemsetOneMBB for a memset of length 1, or
9415 // # fall thru to StartMBB.
9416 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
9417 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
9418 MBB->addSuccessor(MemsetOneCheckMBB);
9419 MBB = MemsetOneCheckMBB;
9420 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9421 .addReg(LenAdjReg).addImm(-1);
9422 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9424 .addMBB(MemsetOneMBB);
9425 MBB->addSuccessor(MemsetOneMBB, {10, 100});
9426 MBB->addSuccessor(StartMBB, {90, 100});
9427
9428 // MemsetOneMBB:
9429 // # Jump back to AllDoneMBB after a single MVI or STC.
9430 MBB = MemsetOneMBB;
9431 insertMemMemOp(MBB, MBB->end(),
9432 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
9433 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
9434 1);
9435 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
9436 MBB->addSuccessor(AllDoneMBB);
9437 }
9438
9439 // StartMBB:
9440 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
9441 MBB = StartMBB;
9442 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9443 .addReg(StartCountReg).addImm(0);
9444 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9446 .addMBB(DoneMBB);
9447 MBB->addSuccessor(DoneMBB);
9448 MBB->addSuccessor(LoopMBB);
9449 }
9450 else {
9451 StartMBB = MBB;
9452 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
9453 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9454 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
9455
9456 // StartMBB:
9457 // # fall through to LoopMBB
9458 MBB->addSuccessor(LoopMBB);
9459
9460 DestBase = MachineOperand::CreateReg(NextDestReg, false);
9461 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
9462 if (EndMBB && !ImmLength)
9463 // If the loop handled the whole CLC range, DoneMBB will be empty with
9464 // CC live-through into EndMBB, so add it as live-in.
9465 DoneMBB->addLiveIn(SystemZ::CC);
9466 }
9467
9468 // LoopMBB:
9469 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
9470 // [ %NextDestReg, NextMBB ]
9471 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
9472 // [ %NextSrcReg, NextMBB ]
9473 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
9474 // [ %NextCountReg, NextMBB ]
9475 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
9476 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
9477 // ( JLH EndMBB )
9478 //
9479 // The prefetch is used only for MVC. The JLH is used only for CLC.
9480 MBB = LoopMBB;
9481 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
9482 .addReg(StartDestReg).addMBB(StartMBB)
9483 .addReg(NextDestReg).addMBB(NextMBB);
9484 if (!HaveSingleBase)
9485 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
9486 .addReg(StartSrcReg).addMBB(StartMBB)
9487 .addReg(NextSrcReg).addMBB(NextMBB);
9488 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
9489 .addReg(StartCountReg).addMBB(StartMBB)
9490 .addReg(NextCountReg).addMBB(NextMBB);
9491 if (Opcode == SystemZ::MVC)
9492 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
9494 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
9495 insertMemMemOp(MBB, MBB->end(),
9496 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
9497 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
9498 if (EndMBB) {
9499 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9501 .addMBB(EndMBB);
9502 MBB->addSuccessor(EndMBB);
9503 MBB->addSuccessor(NextMBB);
9504 }
9505
9506 // NextMBB:
9507 // %NextDestReg = LA 256(%ThisDestReg)
9508 // %NextSrcReg = LA 256(%ThisSrcReg)
9509 // %NextCountReg = AGHI %ThisCountReg, -1
9510 // CGHI %NextCountReg, 0
9511 // JLH LoopMBB
9512 // # fall through to DoneMBB
9513 //
9514 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
9515 MBB = NextMBB;
9516 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
9517 .addReg(ThisDestReg).addImm(256).addReg(0);
9518 if (!HaveSingleBase)
9519 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
9520 .addReg(ThisSrcReg).addImm(256).addReg(0);
9521 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
9522 .addReg(ThisCountReg).addImm(-1);
9523 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9524 .addReg(NextCountReg).addImm(0);
9525 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9527 .addMBB(LoopMBB);
9528 MBB->addSuccessor(LoopMBB);
9529 MBB->addSuccessor(DoneMBB);
9530
9531 MBB = DoneMBB;
9532 if (IsRegForm) {
9533 // DoneMBB:
9534 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
9535 // # Use EXecute Relative Long for the remainder of the bytes. The target
9536 // instruction of the EXRL will have a length field of 1 since 0 is an
9537 // illegal value. The number of bytes processed becomes (%LenAdjReg &
9538 // 0xff) + 1.
9539 // # Fall through to AllDoneMBB.
9540 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9541 Register RemDestReg = HaveSingleBase ? RemSrcReg
9542 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9543 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
9544 .addReg(StartDestReg).addMBB(StartMBB)
9545 .addReg(NextDestReg).addMBB(NextMBB);
9546 if (!HaveSingleBase)
9547 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
9548 .addReg(StartSrcReg).addMBB(StartMBB)
9549 .addReg(NextSrcReg).addMBB(NextMBB);
9550 if (IsMemset)
9551 insertMemMemOp(MBB, MBB->end(),
9552 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
9553 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
9554 MachineInstrBuilder EXRL_MIB =
9555 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
9556 .addImm(Opcode)
9557 .addReg(LenAdjReg)
9558 .addReg(RemDestReg).addImm(DestDisp)
9559 .addReg(RemSrcReg).addImm(SrcDisp);
9560 MBB->addSuccessor(AllDoneMBB);
9561 MBB = AllDoneMBB;
9562 if (Opcode != SystemZ::MVC) {
9563 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
9564 if (EndMBB)
9565 MBB->addLiveIn(SystemZ::CC);
9566 }
9567 }
9569 }
9570
9571 // Handle any remaining bytes with straight-line code.
9572 while (ImmLength > 0) {
9573 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
9574 // The previous iteration might have created out-of-range displacements.
9575 // Apply them using LA/LAY if so.
9576 foldDisplIfNeeded(DestBase, DestDisp);
9577 foldDisplIfNeeded(SrcBase, SrcDisp);
9578 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
9579 DestDisp += ThisLength;
9580 SrcDisp += ThisLength;
9581 ImmLength -= ThisLength;
9582 // If there's another CLC to go, branch to the end if a difference
9583 // was found.
9584 if (EndMBB && ImmLength > 0) {
9586 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9588 .addMBB(EndMBB);
9589 MBB->addSuccessor(EndMBB);
9590 MBB->addSuccessor(NextMBB);
9591 MBB = NextMBB;
9592 }
9593 }
9594 if (EndMBB) {
9595 MBB->addSuccessor(EndMBB);
9596 MBB = EndMBB;
9597 MBB->addLiveIn(SystemZ::CC);
9598 }
9599
9600 MI.eraseFromParent();
9601 return MBB;
9602}
9603
9604// Decompose string pseudo-instruction MI into a loop that continually performs
9605// Opcode until CC != 3.
9606MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
9607 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9608 MachineFunction &MF = *MBB->getParent();
9609 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9611 DebugLoc DL = MI.getDebugLoc();
9612
9613 uint64_t End1Reg = MI.getOperand(0).getReg();
9614 uint64_t Start1Reg = MI.getOperand(1).getReg();
9615 uint64_t Start2Reg = MI.getOperand(2).getReg();
9616 uint64_t CharReg = MI.getOperand(3).getReg();
9617
9618 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
9619 uint64_t This1Reg = MRI.createVirtualRegister(RC);
9620 uint64_t This2Reg = MRI.createVirtualRegister(RC);
9621 uint64_t End2Reg = MRI.createVirtualRegister(RC);
9622
9623 MachineBasicBlock *StartMBB = MBB;
9625 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9626
9627 // StartMBB:
9628 // # fall through to LoopMBB
9629 MBB->addSuccessor(LoopMBB);
9630
9631 // LoopMBB:
9632 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
9633 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
9634 // R0L = %CharReg
9635 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
9636 // JO LoopMBB
9637 // # fall through to DoneMBB
9638 //
9639 // The load of R0L can be hoisted by post-RA LICM.
9640 MBB = LoopMBB;
9641
9642 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
9643 .addReg(Start1Reg).addMBB(StartMBB)
9644 .addReg(End1Reg).addMBB(LoopMBB);
9645 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
9646 .addReg(Start2Reg).addMBB(StartMBB)
9647 .addReg(End2Reg).addMBB(LoopMBB);
9648 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
9649 BuildMI(MBB, DL, TII->get(Opcode))
9650 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
9651 .addReg(This1Reg).addReg(This2Reg);
9652 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9654 MBB->addSuccessor(LoopMBB);
9655 MBB->addSuccessor(DoneMBB);
9656
9657 DoneMBB->addLiveIn(SystemZ::CC);
9658
9659 MI.eraseFromParent();
9660 return DoneMBB;
9661}
9662
9663// Update TBEGIN instruction with final opcode and register clobbers.
9664MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
9665 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
9666 bool NoFloat) const {
9667 MachineFunction &MF = *MBB->getParent();
9668 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9669 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9670
9671 // Update opcode.
9672 MI.setDesc(TII->get(Opcode));
9673
9674 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
9675 // Make sure to add the corresponding GRSM bits if they are missing.
9676 uint64_t Control = MI.getOperand(2).getImm();
9677 static const unsigned GPRControlBit[16] = {
9678 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
9679 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
9680 };
9681 Control |= GPRControlBit[15];
9682 if (TFI->hasFP(MF))
9683 Control |= GPRControlBit[11];
9684 MI.getOperand(2).setImm(Control);
9685
9686 // Add GPR clobbers.
9687 for (int I = 0; I < 16; I++) {
9688 if ((Control & GPRControlBit[I]) == 0) {
9689 unsigned Reg = SystemZMC::GR64Regs[I];
9690 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9691 }
9692 }
9693
9694 // Add FPR/VR clobbers.
9695 if (!NoFloat && (Control & 4) != 0) {
9696 if (Subtarget.hasVector()) {
9697 for (unsigned Reg : SystemZMC::VR128Regs) {
9698 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9699 }
9700 } else {
9701 for (unsigned Reg : SystemZMC::FP64Regs) {
9702 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9703 }
9704 }
9705 }
9706
9707 return MBB;
9708}
9709
9710MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
9711 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9712 MachineFunction &MF = *MBB->getParent();
9714 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9715 DebugLoc DL = MI.getDebugLoc();
9716
9717 Register SrcReg = MI.getOperand(0).getReg();
9718
9719 // Create new virtual register of the same class as source.
9720 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
9721 Register DstReg = MRI->createVirtualRegister(RC);
9722
9723 // Replace pseudo with a normal load-and-test that models the def as
9724 // well.
9725 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
9726 .addReg(SrcReg)
9727 .setMIFlags(MI.getFlags());
9728 MI.eraseFromParent();
9729
9730 return MBB;
9731}
9732
9733MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
9735 MachineFunction &MF = *MBB->getParent();
9737 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9738 DebugLoc DL = MI.getDebugLoc();
9739 const unsigned ProbeSize = getStackProbeSize(MF);
9740 Register DstReg = MI.getOperand(0).getReg();
9741 Register SizeReg = MI.getOperand(2).getReg();
9742
9743 MachineBasicBlock *StartMBB = MBB;
9745 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
9746 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
9747 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
9748 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
9749
9752
9753 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9754 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9755
9756 // LoopTestMBB
9757 // BRC TailTestMBB
9758 // # fallthrough to LoopBodyMBB
9759 StartMBB->addSuccessor(LoopTestMBB);
9760 MBB = LoopTestMBB;
9761 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
9762 .addReg(SizeReg)
9763 .addMBB(StartMBB)
9764 .addReg(IncReg)
9765 .addMBB(LoopBodyMBB);
9766 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
9767 .addReg(PHIReg)
9768 .addImm(ProbeSize);
9769 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9771 .addMBB(TailTestMBB);
9772 MBB->addSuccessor(LoopBodyMBB);
9773 MBB->addSuccessor(TailTestMBB);
9774
9775 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
9776 // J LoopTestMBB
9777 MBB = LoopBodyMBB;
9778 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
9779 .addReg(PHIReg)
9780 .addImm(ProbeSize);
9781 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
9782 .addReg(SystemZ::R15D)
9783 .addImm(ProbeSize);
9784 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9785 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
9786 .setMemRefs(VolLdMMO);
9787 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
9788 MBB->addSuccessor(LoopTestMBB);
9789
9790 // TailTestMBB
9791 // BRC DoneMBB
9792 // # fallthrough to TailMBB
9793 MBB = TailTestMBB;
9794 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9795 .addReg(PHIReg)
9796 .addImm(0);
9797 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9799 .addMBB(DoneMBB);
9800 MBB->addSuccessor(TailMBB);
9801 MBB->addSuccessor(DoneMBB);
9802
9803 // TailMBB
9804 // # fallthrough to DoneMBB
9805 MBB = TailMBB;
9806 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
9807 .addReg(SystemZ::R15D)
9808 .addReg(PHIReg);
9809 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9810 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
9811 .setMemRefs(VolLdMMO);
9812 MBB->addSuccessor(DoneMBB);
9813
9814 // DoneMBB
9815 MBB = DoneMBB;
9816 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
9817 .addReg(SystemZ::R15D);
9818
9819 MI.eraseFromParent();
9820 return DoneMBB;
9821}
9822
9823SDValue SystemZTargetLowering::
9824getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
9826 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
9827 SDLoc DL(SP);
9828 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
9829 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
9830}
9831
9834 switch (MI.getOpcode()) {
9835 case SystemZ::ADJCALLSTACKDOWN:
9836 case SystemZ::ADJCALLSTACKUP:
9837 return emitAdjCallStack(MI, MBB);
9838
9839 case SystemZ::Select32:
9840 case SystemZ::Select64:
9841 case SystemZ::Select128:
9842 case SystemZ::SelectF32:
9843 case SystemZ::SelectF64:
9844 case SystemZ::SelectF128:
9845 case SystemZ::SelectVR32:
9846 case SystemZ::SelectVR64:
9847 case SystemZ::SelectVR128:
9848 return emitSelect(MI, MBB);
9849
9850 case SystemZ::CondStore8Mux:
9851 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
9852 case SystemZ::CondStore8MuxInv:
9853 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
9854 case SystemZ::CondStore16Mux:
9855 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
9856 case SystemZ::CondStore16MuxInv:
9857 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
9858 case SystemZ::CondStore32Mux:
9859 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
9860 case SystemZ::CondStore32MuxInv:
9861 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
9862 case SystemZ::CondStore8:
9863 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
9864 case SystemZ::CondStore8Inv:
9865 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
9866 case SystemZ::CondStore16:
9867 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
9868 case SystemZ::CondStore16Inv:
9869 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
9870 case SystemZ::CondStore32:
9871 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
9872 case SystemZ::CondStore32Inv:
9873 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
9874 case SystemZ::CondStore64:
9875 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
9876 case SystemZ::CondStore64Inv:
9877 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
9878 case SystemZ::CondStoreF32:
9879 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
9880 case SystemZ::CondStoreF32Inv:
9881 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
9882 case SystemZ::CondStoreF64:
9883 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
9884 case SystemZ::CondStoreF64Inv:
9885 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
9886
9887 case SystemZ::SCmp128Hi:
9888 return emitICmp128Hi(MI, MBB, false);
9889 case SystemZ::UCmp128Hi:
9890 return emitICmp128Hi(MI, MBB, true);
9891
9892 case SystemZ::PAIR128:
9893 return emitPair128(MI, MBB);
9894 case SystemZ::AEXT128:
9895 return emitExt128(MI, MBB, false);
9896 case SystemZ::ZEXT128:
9897 return emitExt128(MI, MBB, true);
9898
9899 case SystemZ::ATOMIC_SWAPW:
9900 return emitAtomicLoadBinary(MI, MBB, 0);
9901
9902 case SystemZ::ATOMIC_LOADW_AR:
9903 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
9904 case SystemZ::ATOMIC_LOADW_AFI:
9905 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
9906
9907 case SystemZ::ATOMIC_LOADW_SR:
9908 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
9909
9910 case SystemZ::ATOMIC_LOADW_NR:
9911 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
9912 case SystemZ::ATOMIC_LOADW_NILH:
9913 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
9914
9915 case SystemZ::ATOMIC_LOADW_OR:
9916 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
9917 case SystemZ::ATOMIC_LOADW_OILH:
9918 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
9919
9920 case SystemZ::ATOMIC_LOADW_XR:
9921 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
9922 case SystemZ::ATOMIC_LOADW_XILF:
9923 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
9924
9925 case SystemZ::ATOMIC_LOADW_NRi:
9926 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
9927 case SystemZ::ATOMIC_LOADW_NILHi:
9928 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
9929
9930 case SystemZ::ATOMIC_LOADW_MIN:
9931 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
9932 case SystemZ::ATOMIC_LOADW_MAX:
9933 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
9934 case SystemZ::ATOMIC_LOADW_UMIN:
9935 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
9936 case SystemZ::ATOMIC_LOADW_UMAX:
9937 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
9938
9939 case SystemZ::ATOMIC_CMP_SWAPW:
9940 return emitAtomicCmpSwapW(MI, MBB);
9941 case SystemZ::MVCImm:
9942 case SystemZ::MVCReg:
9943 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
9944 case SystemZ::NCImm:
9945 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
9946 case SystemZ::OCImm:
9947 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
9948 case SystemZ::XCImm:
9949 case SystemZ::XCReg:
9950 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
9951 case SystemZ::CLCImm:
9952 case SystemZ::CLCReg:
9953 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
9954 case SystemZ::MemsetImmImm:
9955 case SystemZ::MemsetImmReg:
9956 case SystemZ::MemsetRegImm:
9957 case SystemZ::MemsetRegReg:
9958 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
9959 case SystemZ::CLSTLoop:
9960 return emitStringWrapper(MI, MBB, SystemZ::CLST);
9961 case SystemZ::MVSTLoop:
9962 return emitStringWrapper(MI, MBB, SystemZ::MVST);
9963 case SystemZ::SRSTLoop:
9964 return emitStringWrapper(MI, MBB, SystemZ::SRST);
9965 case SystemZ::TBEGIN:
9966 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
9967 case SystemZ::TBEGIN_nofloat:
9968 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
9969 case SystemZ::TBEGINC:
9970 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
9971 case SystemZ::LTEBRCompare_Pseudo:
9972 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
9973 case SystemZ::LTDBRCompare_Pseudo:
9974 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
9975 case SystemZ::LTXBRCompare_Pseudo:
9976 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
9977
9978 case SystemZ::PROBED_ALLOCA:
9979 return emitProbedAlloca(MI, MBB);
9980 case SystemZ::EH_SjLj_SetJmp:
9981 return emitEHSjLjSetJmp(MI, MBB);
9982 case SystemZ::EH_SjLj_LongJmp:
9983 return emitEHSjLjLongJmp(MI, MBB);
9984
9985 case TargetOpcode::STACKMAP:
9986 case TargetOpcode::PATCHPOINT:
9987 return emitPatchPoint(MI, MBB);
9988
9989 default:
9990 llvm_unreachable("Unexpected instr type to insert");
9991 }
9992}
9993
9994// This is only used by the isel schedulers, and is needed only to prevent
9995// compiler from crashing when list-ilp is used.
9996const TargetRegisterClass *
9997SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
9998 if (VT == MVT::Untyped)
9999 return &SystemZ::ADDR128BitRegClass;
10001}
10002
10003SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
10004 SelectionDAG &DAG) const {
10005 SDLoc dl(Op);
10006 /*
10007 The rounding method is in FPC Byte 3 bits 6-7, and has the following
10008 settings:
10009 00 Round to nearest
10010 01 Round to 0
10011 10 Round to +inf
10012 11 Round to -inf
10013
10014 FLT_ROUNDS, on the other hand, expects the following:
10015 -1 Undefined
10016 0 Round to 0
10017 1 Round to nearest
10018 2 Round to +inf
10019 3 Round to -inf
10020 */
10021
10022 // Save FPC to register.
10023 SDValue Chain = Op.getOperand(0);
10024 SDValue EFPC(
10025 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
10026 Chain = EFPC.getValue(1);
10027
10028 // Transform as necessary
10029 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
10030 DAG.getConstant(3, dl, MVT::i32));
10031 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
10032 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
10033 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
10034 DAG.getConstant(1, dl, MVT::i32)));
10035
10036 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
10037 DAG.getConstant(1, dl, MVT::i32));
10038 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
10039
10040 return DAG.getMergeValues({RetVal, Chain}, dl);
10041}
10042
10043SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
10044 SelectionDAG &DAG) const {
10045 EVT VT = Op.getValueType();
10046 Op = Op.getOperand(0);
10047 EVT OpVT = Op.getValueType();
10048
10049 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
10050
10051 SDLoc DL(Op);
10052
10053 // load a 0 vector for the third operand of VSUM.
10054 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));
10055
10056 // execute VSUM.
10057 switch (OpVT.getScalarSizeInBits()) {
10058 case 8:
10059 case 16:
10060 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
10061 [[fallthrough]];
10062 case 32:
10063 case 64:
10064 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
10065 DAG.getBitcast(Op.getValueType(), Zero));
10066 break;
10067 case 128:
10068 break; // VSUM over v1i128 should not happen and would be a noop
10069 default:
10070 llvm_unreachable("Unexpected scalar size.");
10071 }
10072 // Cast to original vector type, retrieve last element.
10073 return DAG.getNode(
10074 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),
10075 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));
10076}
10077
10078// Only consider a function fully internal as long as it has local linkage
10079// and is not used in any other way than acting as the called function at
10080// call sites.
10081bool SystemZTargetLowering::isFullyInternal(const Function *Fn) const {
10082 if (!Fn->hasLocalLinkage())
10083 return false;
10084 for (const User *U : Fn->users()) {
10085 if (auto *CB = dyn_cast<CallBase>(U)) {
10086 if (CB->getCalledFunction() != Fn)
10087 return false;
10088 } else
10089 return false;
10090 }
10091 return true;
10092}
10093
10095 FunctionType *FT = F->getFunctionType();
10096 const AttributeList &Attrs = F->getAttributes();
10097 if (Attrs.hasRetAttrs())
10098 OS << Attrs.getAsString(AttributeList::ReturnIndex) << " ";
10099 OS << *F->getReturnType() << " @" << F->getName() << "(";
10100 for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) {
10101 if (I)
10102 OS << ", ";
10103 OS << *FT->getParamType(I);
10104 AttributeSet ArgAttrs = Attrs.getParamAttrs(I);
10105 for (auto A : {Attribute::SExt, Attribute::ZExt, Attribute::NoExt})
10106 if (ArgAttrs.hasAttribute(A))
10108 }
10109 OS << ")\n";
10110}
10111
10112void SystemZTargetLowering::
10113verifyNarrowIntegerArgs_Call(const SmallVectorImpl<ISD::OutputArg> &Outs,
10114 const Function *F, SDValue Callee) const {
10115 bool IsInternal = false;
10116 const Function *CalleeFn = nullptr;
10117 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
10118 if ((CalleeFn = dyn_cast<Function>(G->getGlobal())))
10119 IsInternal = isFullyInternal(CalleeFn);
10120 if (!verifyNarrowIntegerArgs(Outs, IsInternal)) {
10121 errs() << "ERROR: Missing extension attribute of passed "
10122 << "value in call to function:\n" << "Callee: ";
10123 if (CalleeFn != nullptr)
10124 printFunctionArgExts(CalleeFn, errs());
10125 else
10126 errs() << "-\n";
10127 errs() << "Caller: ";
10129 llvm_unreachable("");
10130 }
10131}
10132
10133void SystemZTargetLowering::
10134verifyNarrowIntegerArgs_Ret(const SmallVectorImpl<ISD::OutputArg> &Outs,
10135 const Function *F) const {
10136 if (!verifyNarrowIntegerArgs(Outs, isFullyInternal(F))) {
10137 errs() << "ERROR: Missing extension attribute of returned "
10138 << "value from function:\n";
10140 llvm_unreachable("");
10141 }
10142}
10143
10144// Verify that narrow integer arguments are extended as required by the ABI.
10145// Return false if an error is found.
10146bool SystemZTargetLowering::
10147verifyNarrowIntegerArgs(const SmallVectorImpl<ISD::OutputArg> &Outs,
10148 bool IsInternal) const {
10149 if (IsInternal || !Subtarget.isTargetELF())
10150 return true;
10151
10152 // Temporarily only do the check when explicitly requested, until it can be
10153 // enabled by default.
10155 return true;
10156
10157 if (EnableIntArgExtCheck.getNumOccurrences()) {
10159 return true;
10160 } else if (!getTargetMachine().Options.VerifyArgABICompliance)
10161 return true;
10162
10163 for (unsigned i = 0; i < Outs.size(); ++i) {
10164 MVT VT = Outs[i].VT;
10165 ISD::ArgFlagsTy Flags = Outs[i].Flags;
10166 if (VT.isInteger()) {
10167 assert((VT == MVT::i32 || VT.getSizeInBits() >= 64) &&
10168 "Unexpected integer argument VT.");
10169 if (VT == MVT::i32 &&
10170 !Flags.isSExt() && !Flags.isZExt() && !Flags.isNoExt())
10171 return false;
10172 }
10173 }
10174
10175 return true;
10176}
unsigned const MachineRegisterInfo * MRI
#define Success
unsigned Intr
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
IRTranslator LLVM IR MI
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
uint64_t High
uint64_t IntrinsicInst * II
#define P(N)
static constexpr Register SPReg
static bool isSelectPseudo(MachineInstr &MI)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void printFunctionArgExts(const Function *F, raw_fd_ostream &OS)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static cl::opt< bool > EnableIntArgExtCheck("argext-abi-check", cl::init(false), cl::desc("Verify that narrow int args are properly extended per the " "SystemZ ABI."))
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue lowerAddrSpaceCast(SDValue Op, SelectionDAG &DAG)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
#define OPCODE(NAME)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:234
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1386
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1492
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:910
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1330
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:258
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1468
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition: APInt.h:322
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:370
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:858
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:851
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
@ Add
*p = old + v
Definition: Instructions.h:720
@ Or
*p = old | v
Definition: Instructions.h:728
@ Sub
*p = old - v
Definition: Instructions.h:722
@ And
*p = old & v
Definition: Instructions.h:724
@ Xor
*p = old ^ v
Definition: Instructions.h:730
BinOp getOperation() const
Definition: Instructions.h:805
bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:392
static StringRef getNameFromAttrKind(Attribute::AttrKind AttrKind)
Definition: Attributes.cpp:314
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
The address of a basic block.
Definition: Constants.h:893
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
iterator end()
Definition: DenseMap.h:84
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:766
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:778
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:595
bool hasLocalLinkage() const
Definition: GlobalValue.h:528
bool hasPrivateLinkage() const
Definition: GlobalValue.h:527
bool hasInternalLinkage() const
Definition: GlobalValue.h:526
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setMaxCallFrameSize(uint64_t S)
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setReturnAddressIsTaken(bool s)
MachineFunctionProperties & reset(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getMachineOpcode() const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:748
SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:799
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getRegister(Register Reg, EVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:501
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:758
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:854
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:825
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:495
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:710
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:888
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:496
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:698
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:794
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:490
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:871
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:508
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:765
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:578
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void reserve(size_type N)
Definition: SmallVector.h:663
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:470
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:684
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
iterator end() const
Definition: StringRef.h:118
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
A SystemZ-specific class detailing special use registers particular for calling conventions.
A SystemZ-specific constant pool value.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const
const TargetFrameLowering * getFrameLowering() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
const SystemZRegisterInfo * getRegisterInfo() const override
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
bool useSoftFloat() const override
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Let target indicate that an extending atomic load of the specified type is legal.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:228
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
iterator_range< user_iterator > users()
Definition: Value.h:421
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
self_iterator getIterator()
Definition: ilist_node.h:132
A raw_ostream that writes to a file descriptor.
Definition: raw_ostream.h:460
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1197
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1193
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:753
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:153
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1340
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1226
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1342
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1312
@ STRICT_FCEIL
Definition: ISDOpcodes.h:441
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1343
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
Definition: ISDOpcodes.h:1325
@ STRICT_FMINIMUM
Definition: ISDOpcodes.h:451
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:717
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1299
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1304
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:871
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1338
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1339
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:418
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1490
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:465
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1292
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:334
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1341
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1127
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:522
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1308
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:229
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:642
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1222
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:330
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:445
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1444
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:931
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1336
@ STRICT_FMAXIMUM
Definition: ISDOpcodes.h:450
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:439
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:440
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1344
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1112
@ ConstantPool
Definition: ISDOpcodes.h:82
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:860
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ STRICT_FROUND
Definition: ISDOpcodes.h:443
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:310
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:464
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:442
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1334
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:458
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:480
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:457
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1335
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1253
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1279
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:407
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1333
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:882
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:958
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:438
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:147
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1141
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ AssertZext
Definition: ISDOpcodes.h:62
@ STRICT_FRINT
Definition: ISDOpcodes.h:437
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1398
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1613
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1593
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
ID ArrayRef< Type * > Tys
Definition: Intrinsics.h:102
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition: SystemZ.h:40
static bool isImmHH(uint64_t Val)
Definition: SystemZ.h:176
const unsigned CCMASK_TEND
Definition: SystemZ.h:97
const unsigned CCMASK_CS_EQ
Definition: SystemZ.h:67
const unsigned CCMASK_TBEGIN
Definition: SystemZ.h:92
const unsigned CCMASK_0
Definition: SystemZ.h:27
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition: SystemZ.h:82
const unsigned CCMASK_LOGICAL_CARRY
Definition: SystemZ.h:60
const unsigned TDCMASK_NORMAL_MINUS
Definition: SystemZ.h:122
const unsigned CCMASK_TDC
Definition: SystemZ.h:109
const unsigned CCMASK_FCMP
Definition: SystemZ.h:48
const unsigned CCMASK_TM_SOME_0
Definition: SystemZ.h:81
static bool isImmHL(uint64_t Val)
Definition: SystemZ.h:171
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition: SystemZ.h:124
const unsigned PFD_READ
Definition: SystemZ.h:115
const unsigned CCMASK_1
Definition: SystemZ.h:28
const unsigned TDCMASK_NORMAL_PLUS
Definition: SystemZ.h:121
const unsigned PFD_WRITE
Definition: SystemZ.h:116
const unsigned CCMASK_CMP_GT
Definition: SystemZ.h:37
const unsigned TDCMASK_QNAN_MINUS
Definition: SystemZ.h:128
const unsigned CCMASK_CS
Definition: SystemZ.h:69
const unsigned CCMASK_ANY
Definition: SystemZ.h:31
const unsigned CCMASK_ARITH
Definition: SystemZ.h:55
const unsigned CCMASK_TM_MIXED_MSB_0
Definition: SystemZ.h:78
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition: SystemZ.h:123
static bool isImmLL(uint64_t Val)
Definition: SystemZ.h:161
const unsigned VectorBits
Definition: SystemZ.h:154
static bool isImmLH(uint64_t Val)
Definition: SystemZ.h:166
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition: SystemZ.h:125
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition: SystemZ.h:77
const unsigned IPM_CC
Definition: SystemZ.h:112
const unsigned CCMASK_CMP_LE
Definition: SystemZ.h:39
const unsigned CCMASK_CMP_O
Definition: SystemZ.h:44
const unsigned CCMASK_CMP_EQ
Definition: SystemZ.h:35
const unsigned VectorBytes
Definition: SystemZ.h:158
const unsigned TDCMASK_INFINITY_MINUS
Definition: SystemZ.h:126
const unsigned CCMASK_ICMP
Definition: SystemZ.h:47
const unsigned CCMASK_VCMP_ALL
Definition: SystemZ.h:101
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition: SystemZ.h:104
const unsigned CCMASK_TM_MIXED_MSB_1
Definition: SystemZ.h:79
const unsigned CCMASK_TM_MSB_0
Definition: SystemZ.h:83
const unsigned CCMASK_ARITH_OVERFLOW
Definition: SystemZ.h:54
const unsigned CCMASK_CS_NE
Definition: SystemZ.h:68
const unsigned TDCMASK_SNAN_PLUS
Definition: SystemZ.h:129
const unsigned CCMASK_TM
Definition: SystemZ.h:85
const unsigned CCMASK_3
Definition: SystemZ.h:30
const unsigned CCMASK_CMP_LT
Definition: SystemZ.h:36
const unsigned CCMASK_CMP_NE
Definition: SystemZ.h:38
const unsigned TDCMASK_ZERO_PLUS
Definition: SystemZ.h:119
const unsigned TDCMASK_QNAN_PLUS
Definition: SystemZ.h:127
const unsigned TDCMASK_ZERO_MINUS
Definition: SystemZ.h:120
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition: SystemZ.h:80
const unsigned CCMASK_LOGICAL_BORROW
Definition: SystemZ.h:62
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition: SystemZ.h:43
const unsigned CCMASK_LOGICAL
Definition: SystemZ.h:64
const unsigned CCMASK_TM_MSB_1
Definition: SystemZ.h:84
const unsigned TDCMASK_SNAN_MINUS
Definition: SystemZ.h:130
@ GeneralDynamic
Definition: CodeGen.h:46
@ GS
Definition: X86.h:210
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
support::ulittle32_t Word
Definition: IRSymtab.h:52
NodeAddr< CodeNode * > Code
Definition: RDFGraph.h:388
constexpr const char32_t SBase
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:353
@ Offset
Definition: DWP.cpp:480
@ Length
Definition: DWP.cpp:480
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:255
@ Done
Definition: Threading.h:61
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ Mul
Product of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:581
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define EQ(a, b)
Definition: regexec.c:112
#define NC
Definition: regutils.h:42
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:243
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:178
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:43
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:164
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:73
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:303
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:172
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:137
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
const uint32_t * getNoPreservedMask() const override
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})