Bug Summary

File:build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
Warning:line 782, column 36
The result of the left shift is undefined due to shifting by '18446744073709551615', which is greater or equal to the width of type 'uint64_t'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name SystemZISelLowering.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-16/lib/clang/16.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/SystemZ -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/SystemZ -I include -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-10-03-140002-15933-1 -x c++ /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
15#include "SystemZConstantPoolValue.h"
16#include "SystemZMachineFunctionInfo.h"
17#include "SystemZTargetMachine.h"
18#include "llvm/CodeGen/CallingConvLower.h"
19#include "llvm/CodeGen/MachineInstrBuilder.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
22#include "llvm/IR/IntrinsicInst.h"
23#include "llvm/IR/Intrinsics.h"
24#include "llvm/IR/IntrinsicsS390.h"
25#include "llvm/Support/CommandLine.h"
26#include "llvm/Support/KnownBits.h"
27#include <cctype>
28
29using namespace llvm;
30
31#define DEBUG_TYPE"systemz-lower" "systemz-lower"
32
33namespace {
34// Represents information about a comparison.
35struct Comparison {
36 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
37 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
38 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
39
40 // The operands to the comparison.
41 SDValue Op0, Op1;
42
43 // Chain if this is a strict floating-point comparison.
44 SDValue Chain;
45
46 // The opcode that should be used to compare Op0 and Op1.
47 unsigned Opcode;
48
49 // A SystemZICMP value. Only used for integer comparisons.
50 unsigned ICmpType;
51
52 // The mask of CC values that Opcode can produce.
53 unsigned CCValid;
54
55 // The mask of CC values for which the original condition is true.
56 unsigned CCMask;
57};
58} // end anonymous namespace
59
60// Classify VT as either 32 or 64 bit.
61static bool is32Bit(EVT VT) {
62 switch (VT.getSimpleVT().SimpleTy) {
63 case MVT::i32:
64 return true;
65 case MVT::i64:
66 return false;
67 default:
68 llvm_unreachable("Unsupported type")::llvm::llvm_unreachable_internal("Unsupported type", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 68)
;
69 }
70}
71
72// Return a version of MachineOperand that can be safely used before the
73// final use.
74static MachineOperand earlyUseOperand(MachineOperand Op) {
75 if (Op.isReg())
76 Op.setIsKill(false);
77 return Op;
78}
79
80SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
81 const SystemZSubtarget &STI)
82 : TargetLowering(TM), Subtarget(STI) {
83 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
84
85 auto *Regs = STI.getSpecialRegisters();
86
87 // Set up the register classes.
88 if (Subtarget.hasHighWord())
89 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
90 else
91 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
92 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
93 if (!useSoftFloat()) {
94 if (Subtarget.hasVector()) {
95 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
96 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
97 } else {
98 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
99 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
100 }
101 if (Subtarget.hasVectorEnhancements1())
102 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
103 else
104 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
105
106 if (Subtarget.hasVector()) {
107 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
108 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
109 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
110 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
111 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
112 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
113 }
114 }
115
116 // Compute derived properties from the register classes
117 computeRegisterProperties(Subtarget.getRegisterInfo());
118
119 // Set up special registers.
120 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
121
122 // TODO: It may be better to default to latency-oriented scheduling, however
123 // LLVM's current latency-oriented scheduler can't handle physreg definitions
124 // such as SystemZ has with CC, so set this to the register-pressure
125 // scheduler, because it can.
126 setSchedulingPreference(Sched::RegPressure);
127
128 setBooleanContents(ZeroOrOneBooleanContent);
129 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
130
131 // Instructions are strings of 2-byte aligned 2-byte values.
132 setMinFunctionAlignment(Align(2));
133 // For performance reasons we prefer 16-byte alignment.
134 setPrefFunctionAlignment(Align(16));
135
136 // Handle operations that are handled in a similar way for all types.
137 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
138 I <= MVT::LAST_FP_VALUETYPE;
139 ++I) {
140 MVT VT = MVT::SimpleValueType(I);
141 if (isTypeLegal(VT)) {
142 // Lower SET_CC into an IPM-based sequence.
143 setOperationAction(ISD::SETCC, VT, Custom);
144 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
145 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
146
147 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
148 setOperationAction(ISD::SELECT, VT, Expand);
149
150 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
151 setOperationAction(ISD::SELECT_CC, VT, Custom);
152 setOperationAction(ISD::BR_CC, VT, Custom);
153 }
154 }
155
156 // Expand jump table branches as address arithmetic followed by an
157 // indirect jump.
158 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
159
160 // Expand BRCOND into a BR_CC (see above).
161 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
162
163 // Handle integer types.
164 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
165 I <= MVT::LAST_INTEGER_VALUETYPE;
166 ++I) {
167 MVT VT = MVT::SimpleValueType(I);
168 if (isTypeLegal(VT)) {
169 setOperationAction(ISD::ABS, VT, Legal);
170
171 // Expand individual DIV and REMs into DIVREMs.
172 setOperationAction(ISD::SDIV, VT, Expand);
173 setOperationAction(ISD::UDIV, VT, Expand);
174 setOperationAction(ISD::SREM, VT, Expand);
175 setOperationAction(ISD::UREM, VT, Expand);
176 setOperationAction(ISD::SDIVREM, VT, Custom);
177 setOperationAction(ISD::UDIVREM, VT, Custom);
178
179 // Support addition/subtraction with overflow.
180 setOperationAction(ISD::SADDO, VT, Custom);
181 setOperationAction(ISD::SSUBO, VT, Custom);
182
183 // Support addition/subtraction with carry.
184 setOperationAction(ISD::UADDO, VT, Custom);
185 setOperationAction(ISD::USUBO, VT, Custom);
186
187 // Support carry in as value rather than glue.
188 setOperationAction(ISD::ADDCARRY, VT, Custom);
189 setOperationAction(ISD::SUBCARRY, VT, Custom);
190
191 // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
192 // stores, putting a serialization instruction after the stores.
193 setOperationAction(ISD::ATOMIC_LOAD, VT, Custom);
194 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
195
196 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
197 // available, or if the operand is constant.
198 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
199
200 // Use POPCNT on z196 and above.
201 if (Subtarget.hasPopulationCount())
202 setOperationAction(ISD::CTPOP, VT, Custom);
203 else
204 setOperationAction(ISD::CTPOP, VT, Expand);
205
206 // No special instructions for these.
207 setOperationAction(ISD::CTTZ, VT, Expand);
208 setOperationAction(ISD::ROTR, VT, Expand);
209
210 // Use *MUL_LOHI where possible instead of MULH*.
211 setOperationAction(ISD::MULHS, VT, Expand);
212 setOperationAction(ISD::MULHU, VT, Expand);
213 setOperationAction(ISD::SMUL_LOHI, VT, Custom);
214 setOperationAction(ISD::UMUL_LOHI, VT, Custom);
215
216 // Only z196 and above have native support for conversions to unsigned.
217 // On z10, promoting to i64 doesn't generate an inexact condition for
218 // values that are outside the i32 range but in the i64 range, so use
219 // the default expansion.
220 if (!Subtarget.hasFPExtension())
221 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
222
223 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
224 // default to Expand, so need to be modified to Legal where appropriate.
225 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal);
226 if (Subtarget.hasFPExtension())
227 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal);
228
229 // And similarly for STRICT_[SU]INT_TO_FP.
230 setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal);
231 if (Subtarget.hasFPExtension())
232 setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal);
233 }
234 }
235
236 // Type legalization will convert 8- and 16-bit atomic operations into
237 // forms that operate on i32s (but still keeping the original memory VT).
238 // Lower them into full i32 operations.
239 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom);
240 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom);
241 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
242 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
243 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom);
244 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom);
245 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
246 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom);
247 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom);
248 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
249 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
250
251 // Even though i128 is not a legal type, we still need to custom lower
252 // the atomic operations in order to exploit SystemZ instructions.
253 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
254 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
255
256 // We can use the CC result of compare-and-swap to implement
257 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
258 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom);
259 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom);
260 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
261
262 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
263
264 // Traps are legal, as we will convert them to "j .+2".
265 setOperationAction(ISD::TRAP, MVT::Other, Legal);
266
267 // z10 has instructions for signed but not unsigned FP conversion.
268 // Handle unsigned 32-bit types as signed 64-bit types.
269 if (!Subtarget.hasFPExtension()) {
270 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
271 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
272 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Promote);
273 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
274 }
275
276 // We have native support for a 64-bit CTLZ, via FLOGR.
277 setOperationAction(ISD::CTLZ, MVT::i32, Promote);
278 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
279 setOperationAction(ISD::CTLZ, MVT::i64, Legal);
280
281 // On z15 we have native support for a 64-bit CTPOP.
282 if (Subtarget.hasMiscellaneousExtensions3()) {
283 setOperationAction(ISD::CTPOP, MVT::i32, Promote);
284 setOperationAction(ISD::CTPOP, MVT::i64, Legal);
285 }
286
287 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
288 setOperationAction(ISD::OR, MVT::i64, Custom);
289
290 // Expand 128 bit shifts without using a libcall.
291 setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
292 setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
293 setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
294 setLibcallName(RTLIB::SRL_I128, nullptr);
295 setLibcallName(RTLIB::SHL_I128, nullptr);
296 setLibcallName(RTLIB::SRA_I128, nullptr);
297
298 // Handle bitcast from fp128 to i128.
299 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
300
301 // We have native instructions for i8, i16 and i32 extensions, but not i1.
302 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
303 for (MVT VT : MVT::integer_valuetypes()) {
304 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
305 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
306 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
307 }
308
309 // Handle the various types of symbolic address.
310 setOperationAction(ISD::ConstantPool, PtrVT, Custom);
311 setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
312 setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
313 setOperationAction(ISD::BlockAddress, PtrVT, Custom);
314 setOperationAction(ISD::JumpTable, PtrVT, Custom);
315
316 // We need to handle dynamic allocations specially because of the
317 // 160-byte area at the bottom of the stack.
318 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
319 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom);
320
321 setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
322 setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
323
324 // Handle prefetches with PFD or PFDRL.
325 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
326
327 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
328 // Assume by default that all vector operations need to be expanded.
329 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
330 if (getOperationAction(Opcode, VT) == Legal)
331 setOperationAction(Opcode, VT, Expand);
332
333 // Likewise all truncating stores and extending loads.
334 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
335 setTruncStoreAction(VT, InnerVT, Expand);
336 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
337 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
338 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
339 }
340
341 if (isTypeLegal(VT)) {
342 // These operations are legal for anything that can be stored in a
343 // vector register, even if there is no native support for the format
344 // as such. In particular, we can do these for v4f32 even though there
345 // are no specific instructions for that format.
346 setOperationAction(ISD::LOAD, VT, Legal);
347 setOperationAction(ISD::STORE, VT, Legal);
348 setOperationAction(ISD::VSELECT, VT, Legal);
349 setOperationAction(ISD::BITCAST, VT, Legal);
350 setOperationAction(ISD::UNDEF, VT, Legal);
351
352 // Likewise, except that we need to replace the nodes with something
353 // more specific.
354 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
355 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
356 }
357 }
358
359 // Handle integer vector types.
360 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
361 if (isTypeLegal(VT)) {
362 // These operations have direct equivalents.
363 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
364 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
365 setOperationAction(ISD::ADD, VT, Legal);
366 setOperationAction(ISD::SUB, VT, Legal);
367 if (VT != MVT::v2i64)
368 setOperationAction(ISD::MUL, VT, Legal);
369 setOperationAction(ISD::ABS, VT, Legal);
370 setOperationAction(ISD::AND, VT, Legal);
371 setOperationAction(ISD::OR, VT, Legal);
372 setOperationAction(ISD::XOR, VT, Legal);
373 if (Subtarget.hasVectorEnhancements1())
374 setOperationAction(ISD::CTPOP, VT, Legal);
375 else
376 setOperationAction(ISD::CTPOP, VT, Custom);
377 setOperationAction(ISD::CTTZ, VT, Legal);
378 setOperationAction(ISD::CTLZ, VT, Legal);
379
380 // Convert a GPR scalar to a vector by inserting it into element 0.
381 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
382
383 // Use a series of unpacks for extensions.
384 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
385 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
386
387 // Detect shifts by a scalar amount and convert them into
388 // V*_BY_SCALAR.
389 setOperationAction(ISD::SHL, VT, Custom);
390 setOperationAction(ISD::SRA, VT, Custom);
391 setOperationAction(ISD::SRL, VT, Custom);
392
393 // At present ROTL isn't matched by DAGCombiner. ROTR should be
394 // converted into ROTL.
395 setOperationAction(ISD::ROTL, VT, Expand);
396 setOperationAction(ISD::ROTR, VT, Expand);
397
398 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
399 // and inverting the result as necessary.
400 setOperationAction(ISD::SETCC, VT, Custom);
401 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
402 if (Subtarget.hasVectorEnhancements1())
403 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
404 }
405 }
406
407 if (Subtarget.hasVector()) {
408 // There should be no need to check for float types other than v2f64
409 // since <2 x f32> isn't a legal type.
410 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
411 setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal);
412 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
413 setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal);
414 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
415 setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal);
416 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
417 setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);
418
419 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
420 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal);
421 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
422 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal);
423 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
424 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f64, Legal);
425 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
426 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f64, Legal);
427 }
428
429 if (Subtarget.hasVectorEnhancements2()) {
430 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
431 setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal);
432 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
433 setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal);
434 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
435 setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal);
436 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
437 setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal);
438
439 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
440 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal);
441 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
442 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal);
443 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
444 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f32, Legal);
445 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
446 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f32, Legal);
447 }
448
449 // Handle floating-point types.
450 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
451 I <= MVT::LAST_FP_VALUETYPE;
452 ++I) {
453 MVT VT = MVT::SimpleValueType(I);
454 if (isTypeLegal(VT)) {
455 // We can use FI for FRINT.
456 setOperationAction(ISD::FRINT, VT, Legal);
457
458 // We can use the extended form of FI for other rounding operations.
459 if (Subtarget.hasFPExtension()) {
460 setOperationAction(ISD::FNEARBYINT, VT, Legal);
461 setOperationAction(ISD::FFLOOR, VT, Legal);
462 setOperationAction(ISD::FCEIL, VT, Legal);
463 setOperationAction(ISD::FTRUNC, VT, Legal);
464 setOperationAction(ISD::FROUND, VT, Legal);
465 }
466
467 // No special instructions for these.
468 setOperationAction(ISD::FSIN, VT, Expand);
469 setOperationAction(ISD::FCOS, VT, Expand);
470 setOperationAction(ISD::FSINCOS, VT, Expand);
471 setOperationAction(ISD::FREM, VT, Expand);
472 setOperationAction(ISD::FPOW, VT, Expand);
473
474 // Special treatment.
475 setOperationAction(ISD::IS_FPCLASS, VT, Custom);
476
477 // Handle constrained floating-point operations.
478 setOperationAction(ISD::STRICT_FADD, VT, Legal);
479 setOperationAction(ISD::STRICT_FSUB, VT, Legal);
480 setOperationAction(ISD::STRICT_FMUL, VT, Legal);
481 setOperationAction(ISD::STRICT_FDIV, VT, Legal);
482 setOperationAction(ISD::STRICT_FMA, VT, Legal);
483 setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
484 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
485 setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal);
486 setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
487 if (Subtarget.hasFPExtension()) {
488 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
489 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
490 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
491 setOperationAction(ISD::STRICT_FROUND, VT, Legal);
492 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
493 }
494 }
495 }
496
497 // Handle floating-point vector types.
498 if (Subtarget.hasVector()) {
499 // Scalar-to-vector conversion is just a subreg.
500 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
501 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
502
503 // Some insertions and extractions can be done directly but others
504 // need to go via integers.
505 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
506 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
507 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
508 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
509
510 // These operations have direct equivalents.
511 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
512 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
513 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
514 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
515 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
516 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
517 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
518 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
519 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
520 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
521 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
522 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
523 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
524 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
525
526 // Handle constrained floating-point operations.
527 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
528 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
529 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
530 setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
531 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
532 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
533 setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
534 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
535 setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
536 setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
537 setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
538 setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
539 }
540
541 // The vector enhancements facility 1 has instructions for these.
542 if (Subtarget.hasVectorEnhancements1()) {
543 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
544 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
545 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
546 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
547 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
548 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
549 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
550 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
551 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
552 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
553 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
554 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
555 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
556 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
557
558 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
559 setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal);
560 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
561 setOperationAction(ISD::FMINIMUM, MVT::f64, Legal);
562
563 setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal);
564 setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal);
565 setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);
566 setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal);
567
568 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
569 setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
570 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
571 setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
572
573 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
574 setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
575 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
576 setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
577
578 setOperationAction(ISD::FMAXNUM, MVT::f128, Legal);
579 setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
580 setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
581 setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
582
583 // Handle constrained floating-point operations.
584 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
585 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
586 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
587 setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
588 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
589 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
590 setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
591 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
592 setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
593 setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
594 setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
595 setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
596 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
597 MVT::v4f32, MVT::v2f64 }) {
598 setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal);
599 setOperationAction(ISD::STRICT_FMINNUM, VT, Legal);
600 setOperationAction(ISD::STRICT_FMAXIMUM, VT, Legal);
601 setOperationAction(ISD::STRICT_FMINIMUM, VT, Legal);
602 }
603 }
604
605 // We only have fused f128 multiply-addition on vector registers.
606 if (!Subtarget.hasVectorEnhancements1()) {
607 setOperationAction(ISD::FMA, MVT::f128, Expand);
608 setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand);
609 }
610
611 // We don't have a copysign instruction on vector registers.
612 if (Subtarget.hasVectorEnhancements1())
613 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
614
615 // Needed so that we don't try to implement f128 constant loads using
616 // a load-and-extend of a f80 constant (in cases where the constant
617 // would fit in an f80).
618 for (MVT VT : MVT::fp_valuetypes())
619 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
620
621 // We don't have extending load instruction on vector registers.
622 if (Subtarget.hasVectorEnhancements1()) {
623 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
624 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
625 }
626
627 // Floating-point truncation and stores need to be done separately.
628 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
629 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
630 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
631
632 // We have 64-bit FPR<->GPR moves, but need special handling for
633 // 32-bit forms.
634 if (!Subtarget.hasVector()) {
635 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
636 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
637 }
638
639 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
640 // structure, but VAEND is a no-op.
641 setOperationAction(ISD::VASTART, MVT::Other, Custom);
642 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
643 setOperationAction(ISD::VAEND, MVT::Other, Expand);
644
645 // Codes for which we want to perform some z-specific combinations.
646 setTargetDAGCombine({ISD::ZERO_EXTEND,
647 ISD::SIGN_EXTEND,
648 ISD::SIGN_EXTEND_INREG,
649 ISD::LOAD,
650 ISD::STORE,
651 ISD::VECTOR_SHUFFLE,
652 ISD::EXTRACT_VECTOR_ELT,
653 ISD::FP_ROUND,
654 ISD::STRICT_FP_ROUND,
655 ISD::FP_EXTEND,
656 ISD::SINT_TO_FP,
657 ISD::UINT_TO_FP,
658 ISD::STRICT_FP_EXTEND,
659 ISD::BSWAP,
660 ISD::SDIV,
661 ISD::UDIV,
662 ISD::SREM,
663 ISD::UREM,
664 ISD::INTRINSIC_VOID,
665 ISD::INTRINSIC_W_CHAIN});
666
667 // Handle intrinsics.
668 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
669 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
670
671 // We want to use MVC in preference to even a single load/store pair.
672 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
673 MaxStoresPerMemcpyOptSize = 0;
674
675 // The main memset sequence is a byte store followed by an MVC.
676 // Two STC or MV..I stores win over that, but the kind of fused stores
677 // generated by target-independent code don't when the byte value is
678 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
679 // than "STC;MVC". Handle the choice in target-specific code instead.
680 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
681 MaxStoresPerMemsetOptSize = 0;
682
683 // Default to having -disable-strictnode-mutation on
684 IsStrictFPEnabled = true;
685}
686
687bool SystemZTargetLowering::useSoftFloat() const {
688 return Subtarget.hasSoftFloat();
689}
690
691EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
692 LLVMContext &, EVT VT) const {
693 if (!VT.isVector())
694 return MVT::i32;
695 return VT.changeVectorElementTypeToInteger();
696}
697
698bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(
699 const MachineFunction &MF, EVT VT) const {
700 VT = VT.getScalarType();
701
702 if (!VT.isSimple())
703 return false;
704
705 switch (VT.getSimpleVT().SimpleTy) {
706 case MVT::f32:
707 case MVT::f64:
708 return true;
709 case MVT::f128:
710 return Subtarget.hasVectorEnhancements1();
711 default:
712 break;
713 }
714
715 return false;
716}
717
718// Return true if the constant can be generated with a vector instruction,
719// such as VGM, VGMB or VREPI.
720bool SystemZVectorConstantInfo::isVectorConstantLegal(
721 const SystemZSubtarget &Subtarget) {
722 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
723 if (!Subtarget.hasVector() ||
3
Assuming the condition is false
724 (isFP128
3.1
Field 'isFP128' is false
3.1
Field 'isFP128' is false
3.1
Field 'isFP128' is false
&& !Subtarget.hasVectorEnhancements1()))
725 return false;
726
727 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
728 // preferred way of creating all-zero and all-one vectors so give it
729 // priority over other methods below.
730 unsigned Mask = 0;
731 unsigned I = 0;
732 for (; I < SystemZ::VectorBytes; ++I) {
4
Loop condition is true. Entering loop body
733 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
734 if (Byte == 0xff)
5
Assuming 'Byte' is not equal to 255
6
Taking false branch
735 Mask |= 1ULL << I;
736 else if (Byte != 0)
7
Assuming 'Byte' is not equal to 0
8
Taking true branch
737 break;
738 }
739 if (I
9.1
'I' is not equal to 'VectorBytes'
9.1
'I' is not equal to 'VectorBytes'
9.1
'I' is not equal to 'VectorBytes'
== SystemZ::VectorBytes) {
9
Execution continues on line 739
10
Taking false branch
740 Opcode = SystemZISD::BYTE_MASK;
741 OpVals.push_back(Mask);
742 VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16);
743 return true;
744 }
745
746 if (SplatBitSize
10.1
Field 'SplatBitSize' is <= 64
10.1
Field 'SplatBitSize' is <= 64
10.1
Field 'SplatBitSize' is <= 64
> 64)
11
Taking false branch
747 return false;
748
749 auto tryValue = [&](uint64_t Value) -> bool {
750 // Try VECTOR REPLICATE IMMEDIATE
751 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
752 if (isInt<16>(SignedValue)) {
753 OpVals.push_back(((unsigned) SignedValue));
754 Opcode = SystemZISD::REPLICATE;
755 VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
756 SystemZ::VectorBits / SplatBitSize);
757 return true;
758 }
759 // Try VECTOR GENERATE MASK
760 unsigned Start, End;
761 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
762 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
763 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
764 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
765 OpVals.push_back(Start - (64 - SplatBitSize));
766 OpVals.push_back(End - (64 - SplatBitSize));
767 Opcode = SystemZISD::ROTATE_MASK;
768 VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
769 SystemZ::VectorBits / SplatBitSize);
770 return true;
771 }
772 return false;
773 };
774
775 // First try assuming that any undefined bits above the highest set bit
776 // and below the lowest set bit are 1s. This increases the likelihood of
777 // being able to use a sign-extended element value in VECTOR REPLICATE
778 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
779 uint64_t SplatBitsZ = SplatBits.getZExtValue();
780 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
781 uint64_t Lower =
782 (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
12
Calling 'findFirstSet<unsigned long>'
19
Returning from 'findFirstSet<unsigned long>'
20
The result of the left shift is undefined due to shifting by '18446744073709551615', which is greater or equal to the width of type 'uint64_t'
783 uint64_t Upper =
784 (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
785 if (tryValue(SplatBitsZ | Upper | Lower))
786 return true;
787
788 // Now try assuming that any undefined bits between the first and
789 // last defined set bits are set. This increases the chances of
790 // using a non-wraparound mask.
791 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
792 return tryValue(SplatBitsZ | Middle);
793}
794
795SystemZVectorConstantInfo::SystemZVectorConstantInfo(APInt IntImm) {
796 if (IntImm.isSingleWord()) {
797 IntBits = APInt(128, IntImm.getZExtValue());
798 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
799 } else
800 IntBits = IntImm;
801 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.")(static_cast <bool> (IntBits.getBitWidth() == 128 &&
"Unsupported APInt.") ? void (0) : __assert_fail ("IntBits.getBitWidth() == 128 && \"Unsupported APInt.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 801, __extension__
__PRETTY_FUNCTION__))
;
802
803 // Find the smallest splat.
804 SplatBits = IntImm;
805 unsigned Width = SplatBits.getBitWidth();
806 while (Width > 8) {
807 unsigned HalfSize = Width / 2;
808 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
809 APInt LowValue = SplatBits.trunc(HalfSize);
810
811 // If the two halves do not match, stop here.
812 if (HighValue != LowValue || 8 > HalfSize)
813 break;
814
815 SplatBits = HighValue;
816 Width = HalfSize;
817 }
818 SplatUndef = 0;
819 SplatBitSize = Width;
820}
821
822SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) {
823 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR")(static_cast <bool> (BVN->isConstant() && "Expected a constant BUILD_VECTOR"
) ? void (0) : __assert_fail ("BVN->isConstant() && \"Expected a constant BUILD_VECTOR\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 823, __extension__
__PRETTY_FUNCTION__))
;
824 bool HasAnyUndefs;
825
826 // Get IntBits by finding the 128 bit splat.
827 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
828 true);
829
830 // Get SplatBits by finding the 8 bit or greater splat.
831 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
832 true);
833}
834
835bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
836 bool ForCodeSize) const {
837 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
838 if (Imm.isZero() || Imm.isNegZero())
1
Taking false branch
839 return true;
840
841 return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
2
Calling 'SystemZVectorConstantInfo::isVectorConstantLegal'
842}
843
844/// Returns true if stack probing through inline assembly is requested.
845bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
846 // If the function specifically requests inline stack probes, emit them.
847 if (MF.getFunction().hasFnAttribute("probe-stack"))
848 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
849 "inline-asm";
850 return false;
851}
852
853bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
854 // We can use CGFI or CLGFI.
855 return isInt<32>(Imm) || isUInt<32>(Imm);
856}
857
858bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
859 // We can use ALGFI or SLGFI.
860 return isUInt<32>(Imm) || isUInt<32>(-Imm);
861}
862
863bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
864 EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const {
865 // Unaligned accesses should never be slower than the expanded version.
866 // We check specifically for aligned accesses in the few cases where
867 // they are required.
868 if (Fast)
869 *Fast = true;
870 return true;
871}
872
873// Information about the addressing mode for a memory access.
874struct AddressingMode {
875 // True if a long displacement is supported.
876 bool LongDisplacement;
877
878 // True if use of index register is supported.
879 bool IndexReg;
880
881 AddressingMode(bool LongDispl, bool IdxReg) :
882 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
883};
884
885// Return the desired addressing mode for a Load which has only one use (in
886// the same block) which is a Store.
887static AddressingMode getLoadStoreAddrMode(bool HasVector,
888 Type *Ty) {
889 // With vector support a Load->Store combination may be combined to either
890 // an MVC or vector operations and it seems to work best to allow the
891 // vector addressing mode.
892 if (HasVector)
893 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
894
895 // Otherwise only the MVC case is special.
896 bool MVC = Ty->isIntegerTy(8);
897 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
898}
899
900// Return the addressing mode which seems most desirable given an LLVM
901// Instruction pointer.
902static AddressingMode
903supportedAddressingMode(Instruction *I, bool HasVector) {
904 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
905 switch (II->getIntrinsicID()) {
906 default: break;
907 case Intrinsic::memset:
908 case Intrinsic::memmove:
909 case Intrinsic::memcpy:
910 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
911 }
912 }
913
914 if (isa<LoadInst>(I) && I->hasOneUse()) {
915 auto *SingleUser = cast<Instruction>(*I->user_begin());
916 if (SingleUser->getParent() == I->getParent()) {
917 if (isa<ICmpInst>(SingleUser)) {
918 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
919 if (C->getBitWidth() <= 64 &&
920 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
921 // Comparison of memory with 16 bit signed / unsigned immediate
922 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
923 } else if (isa<StoreInst>(SingleUser))
924 // Load->Store
925 return getLoadStoreAddrMode(HasVector, I->getType());
926 }
927 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
928 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
929 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
930 // Load->Store
931 return getLoadStoreAddrMode(HasVector, LoadI->getType());
932 }
933
934 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
935
936 // * Use LDE instead of LE/LEY for z13 to avoid partial register
937 // dependencies (LDE only supports small offsets).
938 // * Utilize the vector registers to hold floating point
939 // values (vector load / store instructions only support small
940 // offsets).
941
942 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
943 I->getOperand(0)->getType());
944 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
945 bool IsVectorAccess = MemAccessTy->isVectorTy();
946
947 // A store of an extracted vector element will be combined into a VSTE type
948 // instruction.
949 if (!IsVectorAccess && isa<StoreInst>(I)) {
950 Value *DataOp = I->getOperand(0);
951 if (isa<ExtractElementInst>(DataOp))
952 IsVectorAccess = true;
953 }
954
955 // A load which gets inserted into a vector element will be combined into a
956 // VLE type instruction.
957 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
958 User *LoadUser = *I->user_begin();
959 if (isa<InsertElementInst>(LoadUser))
960 IsVectorAccess = true;
961 }
962
963 if (IsFPAccess || IsVectorAccess)
964 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
965 }
966
967 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
968}
969
970bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
971 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
972 // Punt on globals for now, although they can be used in limited
973 // RELATIVE LONG cases.
974 if (AM.BaseGV)
975 return false;
976
977 // Require a 20-bit signed offset.
978 if (!isInt<20>(AM.BaseOffs))
979 return false;
980
981 bool RequireD12 = Subtarget.hasVector() && Ty->isVectorTy();
982 AddressingMode SupportedAM(!RequireD12, true);
983 if (I != nullptr)
984 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
985
986 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
987 return false;
988
989 if (!SupportedAM.IndexReg)
990 // No indexing allowed.
991 return AM.Scale == 0;
992 else
993 // Indexing is OK but no scale factor can be applied.
994 return AM.Scale == 0 || AM.Scale == 1;
995}
996
997bool SystemZTargetLowering::findOptimalMemOpLowering(
998 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
999 unsigned SrcAS, const AttributeList &FuncAttributes) const {
1000 const int MVCFastLen = 16;
1001
1002 if (Limit != ~unsigned(0)) {
1003 // Don't expand Op into scalar loads/stores in these cases:
1004 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1005 return false; // Small memcpy: Use MVC
1006 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1007 return false; // Small memset (first byte with STC/MVI): Use MVC
1008 if (Op.isZeroMemset())
1009 return false; // Memset zero: Use XC
1010 }
1011
1012 return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
1013 SrcAS, FuncAttributes);
1014}
1015
1016EVT SystemZTargetLowering::getOptimalMemOpType(const MemOp &Op,
1017 const AttributeList &FuncAttributes) const {
1018 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1019}
1020
1021bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1022 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1023 return false;
1024 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedSize();
1025 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedSize();
1026 return FromBits > ToBits;
1027}
1028
1029bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
1030 if (!FromVT.isInteger() || !ToVT.isInteger())
1031 return false;
1032 unsigned FromBits = FromVT.getFixedSizeInBits();
1033 unsigned ToBits = ToVT.getFixedSizeInBits();
1034 return FromBits > ToBits;
1035}
1036
1037//===----------------------------------------------------------------------===//
1038// Inline asm support
1039//===----------------------------------------------------------------------===//
1040
1041TargetLowering::ConstraintType
1042SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
1043 if (Constraint.size() == 1) {
1044 switch (Constraint[0]) {
1045 case 'a': // Address register
1046 case 'd': // Data register (equivalent to 'r')
1047 case 'f': // Floating-point register
1048 case 'h': // High-part register
1049 case 'r': // General-purpose register
1050 case 'v': // Vector register
1051 return C_RegisterClass;
1052
1053 case 'Q': // Memory with base and unsigned 12-bit displacement
1054 case 'R': // Likewise, plus an index
1055 case 'S': // Memory with base and signed 20-bit displacement
1056 case 'T': // Likewise, plus an index
1057 case 'm': // Equivalent to 'T'.
1058 return C_Memory;
1059
1060 case 'I': // Unsigned 8-bit constant
1061 case 'J': // Unsigned 12-bit constant
1062 case 'K': // Signed 16-bit constant
1063 case 'L': // Signed 20-bit displacement (on all targets we support)
1064 case 'M': // 0x7fffffff
1065 return C_Immediate;
1066
1067 default:
1068 break;
1069 }
1070 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1071 switch (Constraint[1]) {
1072 case 'Q': // Address with base and unsigned 12-bit displacement
1073 case 'R': // Likewise, plus an index
1074 case 'S': // Address with base and signed 20-bit displacement
1075 case 'T': // Likewise, plus an index
1076 return C_Address;
1077
1078 default:
1079 break;
1080 }
1081 }
1082 return TargetLowering::getConstraintType(Constraint);
1083}
1084
1085TargetLowering::ConstraintWeight SystemZTargetLowering::
1086getSingleConstraintMatchWeight(AsmOperandInfo &info,
1087 const char *constraint) const {
1088 ConstraintWeight weight = CW_Invalid;
1089 Value *CallOperandVal = info.CallOperandVal;
1090 // If we don't have a value, we can't do a match,
1091 // but allow it at the lowest weight.
1092 if (!CallOperandVal)
1093 return CW_Default;
1094 Type *type = CallOperandVal->getType();
1095 // Look at the constraint type.
1096 switch (*constraint) {
1097 default:
1098 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
1099 break;
1100
1101 case 'a': // Address register
1102 case 'd': // Data register (equivalent to 'r')
1103 case 'h': // High-part register
1104 case 'r': // General-purpose register
1105 if (CallOperandVal->getType()->isIntegerTy())
1106 weight = CW_Register;
1107 break;
1108
1109 case 'f': // Floating-point register
1110 if (type->isFloatingPointTy())
1111 weight = CW_Register;
1112 break;
1113
1114 case 'v': // Vector register
1115 if ((type->isVectorTy() || type->isFloatingPointTy()) &&
1116 Subtarget.hasVector())
1117 weight = CW_Register;
1118 break;
1119
1120 case 'I': // Unsigned 8-bit constant
1121 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1122 if (isUInt<8>(C->getZExtValue()))
1123 weight = CW_Constant;
1124 break;
1125
1126 case 'J': // Unsigned 12-bit constant
1127 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1128 if (isUInt<12>(C->getZExtValue()))
1129 weight = CW_Constant;
1130 break;
1131
1132 case 'K': // Signed 16-bit constant
1133 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1134 if (isInt<16>(C->getSExtValue()))
1135 weight = CW_Constant;
1136 break;
1137
1138 case 'L': // Signed 20-bit displacement (on all targets we support)
1139 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1140 if (isInt<20>(C->getSExtValue()))
1141 weight = CW_Constant;
1142 break;
1143
1144 case 'M': // 0x7fffffff
1145 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1146 if (C->getZExtValue() == 0x7fffffff)
1147 weight = CW_Constant;
1148 break;
1149 }
1150 return weight;
1151}
1152
1153// Parse a "{tNNN}" register constraint for which the register type "t"
1154// has already been verified. MC is the class associated with "t" and
1155// Map maps 0-based register numbers to LLVM register numbers.
1156static std::pair<unsigned, const TargetRegisterClass *>
1157parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC,
1158 const unsigned *Map, unsigned Size) {
1159 assert(*(Constraint.end()-1) == '}' && "Missing '}'")(static_cast <bool> (*(Constraint.end()-1) == '}' &&
"Missing '}'") ? void (0) : __assert_fail ("*(Constraint.end()-1) == '}' && \"Missing '}'\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1159, __extension__
__PRETTY_FUNCTION__))
;
1160 if (isdigit(Constraint[2])) {
1161 unsigned Index;
1162 bool Failed =
1163 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1164 if (!Failed && Index < Size && Map[Index])
1165 return std::make_pair(Map[Index], RC);
1166 }
1167 return std::make_pair(0U, nullptr);
1168}
1169
1170std::pair<unsigned, const TargetRegisterClass *>
1171SystemZTargetLowering::getRegForInlineAsmConstraint(
1172 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1173 if (Constraint.size() == 1) {
1174 // GCC Constraint Letters
1175 switch (Constraint[0]) {
1176 default: break;
1177 case 'd': // Data register (equivalent to 'r')
1178 case 'r': // General-purpose register
1179 if (VT == MVT::i64)
1180 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1181 else if (VT == MVT::i128)
1182 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1183 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1184
1185 case 'a': // Address register
1186 if (VT == MVT::i64)
1187 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1188 else if (VT == MVT::i128)
1189 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1190 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1191
1192 case 'h': // High-part register (an LLVM extension)
1193 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1194
1195 case 'f': // Floating-point register
1196 if (!useSoftFloat()) {
1197 if (VT == MVT::f64)
1198 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1199 else if (VT == MVT::f128)
1200 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1201 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1202 }
1203 break;
1204 case 'v': // Vector register
1205 if (Subtarget.hasVector()) {
1206 if (VT == MVT::f32)
1207 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1208 if (VT == MVT::f64)
1209 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1210 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1211 }
1212 break;
1213 }
1214 }
1215 if (Constraint.size() > 0 && Constraint[0] == '{') {
1216 // We need to override the default register parsing for GPRs and FPRs
1217 // because the interpretation depends on VT. The internal names of
1218 // the registers are also different from the external names
1219 // (F0D and F0S instead of F0, etc.).
1220 if (Constraint[1] == 'r') {
1221 if (VT == MVT::i32)
1222 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1223 SystemZMC::GR32Regs, 16);
1224 if (VT == MVT::i128)
1225 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1226 SystemZMC::GR128Regs, 16);
1227 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1228 SystemZMC::GR64Regs, 16);
1229 }
1230 if (Constraint[1] == 'f') {
1231 if (useSoftFloat())
1232 return std::make_pair(
1233 0u, static_cast<const TargetRegisterClass *>(nullptr));
1234 if (VT == MVT::f32)
1235 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1236 SystemZMC::FP32Regs, 16);
1237 if (VT == MVT::f128)
1238 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1239 SystemZMC::FP128Regs, 16);
1240 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1241 SystemZMC::FP64Regs, 16);
1242 }
1243 if (Constraint[1] == 'v') {
1244 if (!Subtarget.hasVector())
1245 return std::make_pair(
1246 0u, static_cast<const TargetRegisterClass *>(nullptr));
1247 if (VT == MVT::f32)
1248 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1249 SystemZMC::VR32Regs, 32);
1250 if (VT == MVT::f64)
1251 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1252 SystemZMC::VR64Regs, 32);
1253 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1254 SystemZMC::VR128Regs, 32);
1255 }
1256 }
1257 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1258}
1259
1260// FIXME? Maybe this could be a TableGen attribute on some registers and
1261// this table could be generated automatically from RegInfo.
1262Register
1263SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT,
1264 const MachineFunction &MF) const {
1265 const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
1266
1267 Register Reg =
1268 StringSwitch<Register>(RegName)
1269 .Case("r4", Subtarget->isTargetXPLINK64() ? SystemZ::R4D : 0)
1270 .Case("r15", Subtarget->isTargetELF() ? SystemZ::R15D : 0)
1271 .Default(0);
1272
1273 if (Reg)
1274 return Reg;
1275 report_fatal_error("Invalid register name global variable");
1276}
1277
1278void SystemZTargetLowering::
1279LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1280 std::vector<SDValue> &Ops,
1281 SelectionDAG &DAG) const {
1282 // Only support length 1 constraints for now.
1283 if (Constraint.length() == 1) {
1284 switch (Constraint[0]) {
1285 case 'I': // Unsigned 8-bit constant
1286 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1287 if (isUInt<8>(C->getZExtValue()))
1288 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1289 Op.getValueType()));
1290 return;
1291
1292 case 'J': // Unsigned 12-bit constant
1293 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1294 if (isUInt<12>(C->getZExtValue()))
1295 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1296 Op.getValueType()));
1297 return;
1298
1299 case 'K': // Signed 16-bit constant
1300 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1301 if (isInt<16>(C->getSExtValue()))
1302 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1303 Op.getValueType()));
1304 return;
1305
1306 case 'L': // Signed 20-bit displacement (on all targets we support)
1307 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1308 if (isInt<20>(C->getSExtValue()))
1309 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1310 Op.getValueType()));
1311 return;
1312
1313 case 'M': // 0x7fffffff
1314 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1315 if (C->getZExtValue() == 0x7fffffff)
1316 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1317 Op.getValueType()));
1318 return;
1319 }
1320 }
1321 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1322}
1323
1324//===----------------------------------------------------------------------===//
1325// Calling conventions
1326//===----------------------------------------------------------------------===//
1327
1328#include "SystemZGenCallingConv.inc"
1329
1330const MCPhysReg *SystemZTargetLowering::getScratchRegisters(
1331 CallingConv::ID) const {
1332 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1333 SystemZ::R14D, 0 };
1334 return ScratchRegs;
1335}
1336
1337bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
1338 Type *ToType) const {
1339 return isTruncateFree(FromType, ToType);
1340}
1341
1342bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1343 return CI->isTailCall();
1344}
1345
1346// We do not yet support 128-bit single-element vector types. If the user
1347// attempts to use such types as function argument or return type, prefer
1348// to error out instead of emitting code violating the ABI.
1349static void VerifyVectorType(MVT VT, EVT ArgVT) {
1350 if (ArgVT.isVector() && !VT.isVector())
1351 report_fatal_error("Unsupported vector argument or return type");
1352}
1353
1354static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) {
1355 for (unsigned i = 0; i < Ins.size(); ++i)
1356 VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
1357}
1358
1359static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
1360 for (unsigned i = 0; i < Outs.size(); ++i)
1361 VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
1362}
1363
1364// Value is a value that has been passed to us in the location described by VA
1365// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1366// any loads onto Chain.
1367static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL,
1368 CCValAssign &VA, SDValue Chain,
1369 SDValue Value) {
1370 // If the argument has been promoted from a smaller type, insert an
1371 // assertion to capture this.
1372 if (VA.getLocInfo() == CCValAssign::SExt)
1373 Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
1374 DAG.getValueType(VA.getValVT()));
1375 else if (VA.getLocInfo() == CCValAssign::ZExt)
1376 Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
1377 DAG.getValueType(VA.getValVT()));
1378
1379 if (VA.isExtInLoc())
1380 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1381 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1382 // If this is a short vector argument loaded from the stack,
1383 // extend from i64 to full vector size and then bitcast.
1384 assert(VA.getLocVT() == MVT::i64)(static_cast <bool> (VA.getLocVT() == MVT::i64) ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1384, __extension__ __PRETTY_FUNCTION__))
;
1385 assert(VA.getValVT().isVector())(static_cast <bool> (VA.getValVT().isVector()) ? void (
0) : __assert_fail ("VA.getValVT().isVector()", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1385, __extension__ __PRETTY_FUNCTION__))
;
1386 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1387 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1388 } else
1389 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo")(static_cast <bool> (VA.getLocInfo() == CCValAssign::Full
&& "Unsupported getLocInfo") ? void (0) : __assert_fail
("VA.getLocInfo() == CCValAssign::Full && \"Unsupported getLocInfo\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1389, __extension__
__PRETTY_FUNCTION__))
;
1390 return Value;
1391}
1392
1393// Value is a value of type VA.getValVT() that we need to copy into
1394// the location described by VA. Return a copy of Value converted to
1395// VA.getValVT(). The caller is responsible for handling indirect values.
1396static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
1397 CCValAssign &VA, SDValue Value) {
1398 switch (VA.getLocInfo()) {
1399 case CCValAssign::SExt:
1400 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1401 case CCValAssign::ZExt:
1402 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1403 case CCValAssign::AExt:
1404 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1405 case CCValAssign::BCvt: {
1406 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128)(static_cast <bool> (VA.getLocVT() == MVT::i64 || VA.getLocVT
() == MVT::i128) ? void (0) : __assert_fail ("VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1406, __extension__
__PRETTY_FUNCTION__))
;
1407 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||(static_cast <bool> (VA.getValVT().isVector() || VA.getValVT
() == MVT::f32 || VA.getValVT() == MVT::f64 || VA.getValVT() ==
MVT::f128) ? void (0) : __assert_fail ("VA.getValVT().isVector() || VA.getValVT() == MVT::f32 || VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1408, __extension__
__PRETTY_FUNCTION__))
1408 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128)(static_cast <bool> (VA.getValVT().isVector() || VA.getValVT
() == MVT::f32 || VA.getValVT() == MVT::f64 || VA.getValVT() ==
MVT::f128) ? void (0) : __assert_fail ("VA.getValVT().isVector() || VA.getValVT() == MVT::f32 || VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1408, __extension__
__PRETTY_FUNCTION__))
;
1409 // For an f32 vararg we need to first promote it to an f64 and then
1410 // bitcast it to an i64.
1411 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1412 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1413 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1414 ? MVT::v2i64
1415 : VA.getLocVT();
1416 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1417 // For ELF, this is a short vector argument to be stored to the stack,
1418 // bitcast to v2i64 and then extract first element.
1419 if (BitCastToType == MVT::v2i64)
1420 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1421 DAG.getConstant(0, DL, MVT::i32));
1422 return Value;
1423 }
1424 case CCValAssign::Full:
1425 return Value;
1426 default:
1427 llvm_unreachable("Unhandled getLocInfo()")::llvm::llvm_unreachable_internal("Unhandled getLocInfo()", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1427)
;
1428 }
1429}
1430
1431static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) {
1432 SDLoc DL(In);
1433 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
1434 DAG.getIntPtrConstant(0, DL));
1435 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
1436 DAG.getIntPtrConstant(1, DL));
1437 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1438 MVT::Untyped, Hi, Lo);
1439 return SDValue(Pair, 0);
1440}
1441
1442static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) {
1443 SDLoc DL(In);
1444 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1445 DL, MVT::i64, In);
1446 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1447 DL, MVT::i64, In);
1448 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1449}
1450
1451bool SystemZTargetLowering::splitValueIntoRegisterParts(
1452 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1453 unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
1454 EVT ValueVT = Val.getValueType();
1455 assert((ValueVT != MVT::i128 ||(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1458, __extension__
__PRETTY_FUNCTION__))
1456 ((NumParts == 1 && PartVT == MVT::Untyped) ||(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1458, __extension__
__PRETTY_FUNCTION__))
1457 (NumParts == 2 && PartVT == MVT::i64))) &&(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1458, __extension__
__PRETTY_FUNCTION__))
1458 "Unknown handling of i128 value.")(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1458, __extension__
__PRETTY_FUNCTION__))
;
1459 if (ValueVT == MVT::i128 && NumParts == 1) {
1460 // Inline assembly operand.
1461 Parts[0] = lowerI128ToGR128(DAG, Val);
1462 return true;
1463 }
1464 return false;
1465}
1466
1467SDValue SystemZTargetLowering::joinRegisterPartsIntoValue(
1468 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1469 MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
1470 assert((ValueVT != MVT::i128 ||(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1473, __extension__
__PRETTY_FUNCTION__))
1471 ((NumParts == 1 && PartVT == MVT::Untyped) ||(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1473, __extension__
__PRETTY_FUNCTION__))
1472 (NumParts == 2 && PartVT == MVT::i64))) &&(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1473, __extension__
__PRETTY_FUNCTION__))
1473 "Unknown handling of i128 value.")(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1473, __extension__
__PRETTY_FUNCTION__))
;
1474 if (ValueVT == MVT::i128 && NumParts == 1)
1475 // Inline assembly operand.
1476 return lowerGR128ToI128(DAG, Parts[0]);
1477 return SDValue();
1478}
1479
1480SDValue SystemZTargetLowering::LowerFormalArguments(
1481 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1482 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1483 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1484 MachineFunction &MF = DAG.getMachineFunction();
1485 MachineFrameInfo &MFI = MF.getFrameInfo();
1486 MachineRegisterInfo &MRI = MF.getRegInfo();
1487 SystemZMachineFunctionInfo *FuncInfo =
1488 MF.getInfo<SystemZMachineFunctionInfo>();
1489 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1490 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1491
1492 // Detect unsupported vector argument types.
1493 if (Subtarget.hasVector())
1494 VerifyVectorTypes(Ins);
1495
1496 // Assign locations to all of the incoming arguments.
1497 SmallVector<CCValAssign, 16> ArgLocs;
1498 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1499 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1500
1501 unsigned NumFixedGPRs = 0;
1502 unsigned NumFixedFPRs = 0;
1503 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1504 SDValue ArgValue;
1505 CCValAssign &VA = ArgLocs[I];
1506 EVT LocVT = VA.getLocVT();
1507 if (VA.isRegLoc()) {
1508 // Arguments passed in registers
1509 const TargetRegisterClass *RC;
1510 switch (LocVT.getSimpleVT().SimpleTy) {
1511 default:
1512 // Integers smaller than i64 should be promoted to i64.
1513 llvm_unreachable("Unexpected argument type")::llvm::llvm_unreachable_internal("Unexpected argument type",
"llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1513)
;
1514 case MVT::i32:
1515 NumFixedGPRs += 1;
1516 RC = &SystemZ::GR32BitRegClass;
1517 break;
1518 case MVT::i64:
1519 NumFixedGPRs += 1;
1520 RC = &SystemZ::GR64BitRegClass;
1521 break;
1522 case MVT::f32:
1523 NumFixedFPRs += 1;
1524 RC = &SystemZ::FP32BitRegClass;
1525 break;
1526 case MVT::f64:
1527 NumFixedFPRs += 1;
1528 RC = &SystemZ::FP64BitRegClass;
1529 break;
1530 case MVT::f128:
1531 NumFixedFPRs += 2;
1532 RC = &SystemZ::FP128BitRegClass;
1533 break;
1534 case MVT::v16i8:
1535 case MVT::v8i16:
1536 case MVT::v4i32:
1537 case MVT::v2i64:
1538 case MVT::v4f32:
1539 case MVT::v2f64:
1540 RC = &SystemZ::VR128BitRegClass;
1541 break;
1542 }
1543
1544 Register VReg = MRI.createVirtualRegister(RC);
1545 MRI.addLiveIn(VA.getLocReg(), VReg);
1546 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1547 } else {
1548 assert(VA.isMemLoc() && "Argument not register or memory")(static_cast <bool> (VA.isMemLoc() && "Argument not register or memory"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"Argument not register or memory\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1548, __extension__
__PRETTY_FUNCTION__))
;
1549
1550 // Create the frame index object for this incoming parameter.
1551 // FIXME: Pre-include call frame size in the offset, should not
1552 // need to manually add it here.
1553 int64_t ArgSPOffset = VA.getLocMemOffset();
1554 if (Subtarget.isTargetXPLINK64()) {
1555 auto &XPRegs =
1556 Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
1557 ArgSPOffset += XPRegs.getCallFrameSize();
1558 }
1559 int FI =
1560 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1561
1562 // Create the SelectionDAG nodes corresponding to a load
1563 // from this parameter. Unpromoted ints and floats are
1564 // passed as right-justified 8-byte values.
1565 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1566 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1567 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1568 DAG.getIntPtrConstant(4, DL));
1569 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1570 MachinePointerInfo::getFixedStack(MF, FI));
1571 }
1572
1573 // Convert the value of the argument register into the value that's
1574 // being passed.
1575 if (VA.getLocInfo() == CCValAssign::Indirect) {
1576 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1577 MachinePointerInfo()));
1578 // If the original argument was split (e.g. i128), we need
1579 // to load all parts of it here (using the same address).
1580 unsigned ArgIndex = Ins[I].OrigArgIndex;
1581 assert (Ins[I].PartOffset == 0)(static_cast <bool> (Ins[I].PartOffset == 0) ? void (0)
: __assert_fail ("Ins[I].PartOffset == 0", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1581, __extension__ __PRETTY_FUNCTION__))
;
1582 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1583 CCValAssign &PartVA = ArgLocs[I + 1];
1584 unsigned PartOffset = Ins[I + 1].PartOffset;
1585 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1586 DAG.getIntPtrConstant(PartOffset, DL));
1587 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1588 MachinePointerInfo()));
1589 ++I;
1590 }
1591 } else
1592 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1593 }
1594
1595 // FIXME: Add support for lowering varargs for XPLINK64 in a later patch.
1596 if (IsVarArg && Subtarget.isTargetELF()) {
1597 // Save the number of non-varargs registers for later use by va_start, etc.
1598 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1599 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1600
1601 // Likewise the address (in the form of a frame index) of where the
1602 // first stack vararg would be. The 1-byte size here is arbitrary.
1603 int64_t StackSize = CCInfo.getNextStackOffset();
1604 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
1605
1606 // ...and a similar frame index for the caller-allocated save area
1607 // that will be used to store the incoming registers.
1608 int64_t RegSaveOffset =
1609 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1610 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1611 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1612
1613 // Store the FPR varargs in the reserved frame slots. (We store the
1614 // GPRs as part of the prologue.)
1615 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1616 SDValue MemOps[SystemZ::ELFNumArgFPRs];
1617 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1618 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1619 int FI =
1620 MFI.CreateFixedObject(8, -SystemZMC::ELFCallFrameSize + Offset, true);
1621 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1622 Register VReg = MF.addLiveIn(SystemZ::ELFArgFPRs[I],
1623 &SystemZ::FP64BitRegClass);
1624 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1625 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1626 MachinePointerInfo::getFixedStack(MF, FI));
1627 }
1628 // Join the stores, which are independent of one another.
1629 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1630 makeArrayRef(&MemOps[NumFixedFPRs],
1631 SystemZ::ELFNumArgFPRs-NumFixedFPRs));
1632 }
1633 }
1634
1635 // FIXME: For XPLINK64, Add in support for handling incoming "ADA" special
1636 // register (R5)
1637 return Chain;
1638}
1639
1640static bool canUseSiblingCall(const CCState &ArgCCInfo,
1641 SmallVectorImpl<CCValAssign> &ArgLocs,
1642 SmallVectorImpl<ISD::OutputArg> &Outs) {
1643 // Punt if there are any indirect or stack arguments, or if the call
1644 // needs the callee-saved argument register R6, or if the call uses
1645 // the callee-saved register arguments SwiftSelf and SwiftError.
1646 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1647 CCValAssign &VA = ArgLocs[I];
1648 if (VA.getLocInfo() == CCValAssign::Indirect)
1649 return false;
1650 if (!VA.isRegLoc())
1651 return false;
1652 Register Reg = VA.getLocReg();
1653 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1654 return false;
1655 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1656 return false;
1657 }
1658 return true;
1659}
1660
1661SDValue
1662SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
1663 SmallVectorImpl<SDValue> &InVals) const {
1664 SelectionDAG &DAG = CLI.DAG;
1665 SDLoc &DL = CLI.DL;
1666 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1667 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1668 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1669 SDValue Chain = CLI.Chain;
1670 SDValue Callee = CLI.Callee;
1671 bool &IsTailCall = CLI.IsTailCall;
1672 CallingConv::ID CallConv = CLI.CallConv;
1673 bool IsVarArg = CLI.IsVarArg;
1674 MachineFunction &MF = DAG.getMachineFunction();
1675 EVT PtrVT = getPointerTy(MF.getDataLayout());
1676 LLVMContext &Ctx = *DAG.getContext();
1677 SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
1678
1679 // FIXME: z/OS support to be added in later.
1680 if (Subtarget.isTargetXPLINK64())
1681 IsTailCall = false;
1682
1683 // Detect unsupported vector argument and return types.
1684 if (Subtarget.hasVector()) {
1685 VerifyVectorTypes(Outs);
1686 VerifyVectorTypes(Ins);
1687 }
1688
1689 // Analyze the operands of the call, assigning locations to each operand.
1690 SmallVector<CCValAssign, 16> ArgLocs;
1691 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1692 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1693
1694 // We don't support GuaranteedTailCallOpt, only automatically-detected
1695 // sibling calls.
1696 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1697 IsTailCall = false;
1698
1699 // Get a count of how many bytes are to be pushed on the stack.
1700 unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1701
1702 if (Subtarget.isTargetXPLINK64())
1703 // Although the XPLINK specifications for AMODE64 state that minimum size
1704 // of the param area is minimum 32 bytes and no rounding is otherwise
1705 // specified, we round this area in 64 bytes increments to be compatible
1706 // with existing compilers.
1707 NumBytes = std::max(64U, (unsigned)alignTo(NumBytes, 64));
1708
1709 // Mark the start of the call.
1710 if (!IsTailCall)
1711 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1712
1713 // Copy argument values to their designated locations.
1714 SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
1715 SmallVector<SDValue, 8> MemOpChains;
1716 SDValue StackPtr;
1717 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1718 CCValAssign &VA = ArgLocs[I];
1719 SDValue ArgValue = OutVals[I];
1720
1721 if (VA.getLocInfo() == CCValAssign::Indirect) {
1722 // Store the argument in a stack slot and pass its address.
1723 unsigned ArgIndex = Outs[I].OrigArgIndex;
1724 EVT SlotVT;
1725 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1726 // Allocate the full stack space for a promoted (and split) argument.
1727 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1728 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1729 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1730 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1731 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1732 } else {
1733 SlotVT = Outs[I].ArgVT;
1734 }
1735 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1736 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1737 MemOpChains.push_back(
1738 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1739 MachinePointerInfo::getFixedStack(MF, FI)));
1740 // If the original argument was split (e.g. i128), we need
1741 // to store all parts of it here (and pass just one address).
1742 assert (Outs[I].PartOffset == 0)(static_cast <bool> (Outs[I].PartOffset == 0) ? void (0
) : __assert_fail ("Outs[I].PartOffset == 0", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1742, __extension__ __PRETTY_FUNCTION__))
;
1743 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1744 SDValue PartValue = OutVals[I + 1];
1745 unsigned PartOffset = Outs[I + 1].PartOffset;
1746 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1747 DAG.getIntPtrConstant(PartOffset, DL));
1748 MemOpChains.push_back(
1749 DAG.getStore(Chain, DL, PartValue, Address,
1750 MachinePointerInfo::getFixedStack(MF, FI)));
1751 assert((PartOffset + PartValue.getValueType().getStoreSize() <=(static_cast <bool> ((PartOffset + PartValue.getValueType
().getStoreSize() <= SlotVT.getStoreSize()) && "Not enough space for argument part!"
) ? void (0) : __assert_fail ("(PartOffset + PartValue.getValueType().getStoreSize() <= SlotVT.getStoreSize()) && \"Not enough space for argument part!\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1752, __extension__
__PRETTY_FUNCTION__))
1752 SlotVT.getStoreSize()) && "Not enough space for argument part!")(static_cast <bool> ((PartOffset + PartValue.getValueType
().getStoreSize() <= SlotVT.getStoreSize()) && "Not enough space for argument part!"
) ? void (0) : __assert_fail ("(PartOffset + PartValue.getValueType().getStoreSize() <= SlotVT.getStoreSize()) && \"Not enough space for argument part!\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1752, __extension__
__PRETTY_FUNCTION__))
;
1753 ++I;
1754 }
1755 ArgValue = SpillSlot;
1756 } else
1757 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1758
1759 if (VA.isRegLoc()) {
1760 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
1761 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
1762 // and low values.
1763 if (VA.getLocVT() == MVT::i128)
1764 ArgValue = lowerI128ToGR128(DAG, ArgValue);
1765 // Queue up the argument copies and emit them at the end.
1766 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1767 } else {
1768 assert(VA.isMemLoc() && "Argument not register or memory")(static_cast <bool> (VA.isMemLoc() && "Argument not register or memory"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"Argument not register or memory\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1768, __extension__
__PRETTY_FUNCTION__))
;
1769
1770 // Work out the address of the stack slot. Unpromoted ints and
1771 // floats are passed as right-justified 8-byte values.
1772 if (!StackPtr.getNode())
1773 StackPtr = DAG.getCopyFromReg(Chain, DL,
1774 Regs->getStackPointerRegister(), PtrVT);
1775 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
1776 VA.getLocMemOffset();
1777 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1778 Offset += 4;
1779 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1780 DAG.getIntPtrConstant(Offset, DL));
1781
1782 // Emit the store.
1783 MemOpChains.push_back(
1784 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1785
1786 // Although long doubles or vectors are passed through the stack when
1787 // they are vararg (non-fixed arguments), if a long double or vector
1788 // occupies the third and fourth slot of the argument list GPR3 should
1789 // still shadow the third slot of the argument list.
1790 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
1791 SDValue ShadowArgValue =
1792 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
1793 DAG.getIntPtrConstant(1, DL));
1794 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
1795 }
1796 }
1797 }
1798
1799 // Join the stores, which are independent of one another.
1800 if (!MemOpChains.empty())
1801 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1802
1803 // Accept direct calls by converting symbolic call addresses to the
1804 // associated Target* opcodes. Force %r1 to be used for indirect
1805 // tail calls.
1806 SDValue Glue;
1807 // FIXME: Add support for XPLINK using the ADA register.
1808 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1809 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1810 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1811 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1812 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1813 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1814 } else if (IsTailCall) {
1815 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1816 Glue = Chain.getValue(1);
1817 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1818 }
1819
1820 // Build a sequence of copy-to-reg nodes, chained and glued together.
1821 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1822 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1823 RegsToPass[I].second, Glue);
1824 Glue = Chain.getValue(1);
1825 }
1826
1827 // The first call operand is the chain and the second is the target address.
1828 SmallVector<SDValue, 8> Ops;
1829 Ops.push_back(Chain);
1830 Ops.push_back(Callee);
1831
1832 // Add argument registers to the end of the list so that they are
1833 // known live into the call.
1834 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1835 Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1836 RegsToPass[I].second.getValueType()));
1837
1838 // Add a register mask operand representing the call-preserved registers.
1839 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1840 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1841 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1841, __extension__
__PRETTY_FUNCTION__))
;
1842 Ops.push_back(DAG.getRegisterMask(Mask));
1843
1844 // Glue the call to the argument copies, if any.
1845 if (Glue.getNode())
1846 Ops.push_back(Glue);
1847
1848 // Emit the call.
1849 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1850 if (IsTailCall)
1851 return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1852 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
1853 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
1854 Glue = Chain.getValue(1);
1855
1856 // Mark the end of the call, which is glued to the call itself.
1857 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
1858 Glue = Chain.getValue(1);
1859
1860 // Assign locations to each value returned by this call.
1861 SmallVector<CCValAssign, 16> RetLocs;
1862 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
1863 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1864
1865 // Copy all of the result registers out of their specified physreg.
1866 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1867 CCValAssign &VA = RetLocs[I];
1868
1869 // Copy the value out, gluing the copy to the end of the call sequence.
1870 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1871 VA.getLocVT(), Glue);
1872 Chain = RetValue.getValue(1);
1873 Glue = RetValue.getValue(2);
1874
1875 // Convert the value of the return register into the value that's
1876 // being returned.
1877 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1878 }
1879
1880 return Chain;
1881}
1882
1883// Generate a call taking the given operands as arguments and returning a
1884// result of type RetVT.
1885std::pair<SDValue, SDValue> SystemZTargetLowering::makeExternalCall(
1886 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
1887 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
1888 bool DoesNotReturn, bool IsReturnValueUsed) const {
1889 TargetLowering::ArgListTy Args;
1890 Args.reserve(Ops.size());
1891
1892 TargetLowering::ArgListEntry Entry;
1893 for (SDValue Op : Ops) {
1894 Entry.Node = Op;
1895 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1896 Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
1897 Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
1898 Args.push_back(Entry);
1899 }
1900
1901 SDValue Callee =
1902 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
1903
1904 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
1905 TargetLowering::CallLoweringInfo CLI(DAG);
1906 bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned);
1907 CLI.setDebugLoc(DL)
1908 .setChain(Chain)
1909 .setCallee(CallConv, RetTy, Callee, std::move(Args))
1910 .setNoReturn(DoesNotReturn)
1911 .setDiscardResult(!IsReturnValueUsed)
1912 .setSExtResult(SignExtend)
1913 .setZExtResult(!SignExtend);
1914 return LowerCallTo(CLI);
1915}
1916
1917bool SystemZTargetLowering::
1918CanLowerReturn(CallingConv::ID CallConv,
1919 MachineFunction &MF, bool isVarArg,
1920 const SmallVectorImpl<ISD::OutputArg> &Outs,
1921 LLVMContext &Context) const {
1922 // Detect unsupported vector return types.
1923 if (Subtarget.hasVector())
1924 VerifyVectorTypes(Outs);
1925
1926 // Special case that we cannot easily detect in RetCC_SystemZ since
1927 // i128 is not a legal type.
1928 for (auto &Out : Outs)
1929 if (Out.ArgVT == MVT::i128)
1930 return false;
1931
1932 SmallVector<CCValAssign, 16> RetLocs;
1933 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1934 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1935}
1936
1937SDValue
1938SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
1939 bool IsVarArg,
1940 const SmallVectorImpl<ISD::OutputArg> &Outs,
1941 const SmallVectorImpl<SDValue> &OutVals,
1942 const SDLoc &DL, SelectionDAG &DAG) const {
1943 MachineFunction &MF = DAG.getMachineFunction();
1944
1945 // Detect unsupported vector return types.
1946 if (Subtarget.hasVector())
1947 VerifyVectorTypes(Outs);
1948
1949 // Assign locations to each returned value.
1950 SmallVector<CCValAssign, 16> RetLocs;
1951 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1952 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1953
1954 // Quick exit for void returns
1955 if (RetLocs.empty())
1956 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1957
1958 if (CallConv == CallingConv::GHC)
1959 report_fatal_error("GHC functions return void only");
1960
1961 // Copy the result values into the output registers.
1962 SDValue Glue;
1963 SmallVector<SDValue, 4> RetOps;
1964 RetOps.push_back(Chain);
1965 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1966 CCValAssign &VA = RetLocs[I];
1967 SDValue RetValue = OutVals[I];
1968
1969 // Make the return register live on exit.
1970 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1970, __extension__
__PRETTY_FUNCTION__))
;
1971
1972 // Promote the value as required.
1973 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1974
1975 // Chain and glue the copies together.
1976 Register Reg = VA.getLocReg();
1977 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1978 Glue = Chain.getValue(1);
1979 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1980 }
1981
1982 // Update chain and glue.
1983 RetOps[0] = Chain;
1984 if (Glue.getNode())
1985 RetOps.push_back(Glue);
1986
1987 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
1988}
1989
1990// Return true if Op is an intrinsic node with chain that returns the CC value
1991// as its only (other) argument. Provide the associated SystemZISD opcode and
1992// the mask of valid CC values if so.
1993static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1994 unsigned &CCValid) {
1995 unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1996 switch (Id) {
1997 case Intrinsic::s390_tbegin:
1998 Opcode = SystemZISD::TBEGIN;
1999 CCValid = SystemZ::CCMASK_TBEGIN;
2000 return true;
2001
2002 case Intrinsic::s390_tbegin_nofloat:
2003 Opcode = SystemZISD::TBEGIN_NOFLOAT;
2004 CCValid = SystemZ::CCMASK_TBEGIN;
2005 return true;
2006
2007 case Intrinsic::s390_tend:
2008 Opcode = SystemZISD::TEND;
2009 CCValid = SystemZ::CCMASK_TEND;
2010 return true;
2011
2012 default:
2013 return false;
2014 }
2015}
2016
2017// Return true if Op is an intrinsic node without chain that returns the
2018// CC value as its final argument. Provide the associated SystemZISD
2019// opcode and the mask of valid CC values if so.
2020static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2021 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2022 switch (Id) {
2023 case Intrinsic::s390_vpkshs:
2024 case Intrinsic::s390_vpksfs:
2025 case Intrinsic::s390_vpksgs:
2026 Opcode = SystemZISD::PACKS_CC;
2027 CCValid = SystemZ::CCMASK_VCMP;
2028 return true;
2029
2030 case Intrinsic::s390_vpklshs:
2031 case Intrinsic::s390_vpklsfs:
2032 case Intrinsic::s390_vpklsgs:
2033 Opcode = SystemZISD::PACKLS_CC;
2034 CCValid = SystemZ::CCMASK_VCMP;
2035 return true;
2036
2037 case Intrinsic::s390_vceqbs:
2038 case Intrinsic::s390_vceqhs:
2039 case Intrinsic::s390_vceqfs:
2040 case Intrinsic::s390_vceqgs:
2041 Opcode = SystemZISD::VICMPES;
2042 CCValid = SystemZ::CCMASK_VCMP;
2043 return true;
2044
2045 case Intrinsic::s390_vchbs:
2046 case Intrinsic::s390_vchhs:
2047 case Intrinsic::s390_vchfs:
2048 case Intrinsic::s390_vchgs:
2049 Opcode = SystemZISD::VICMPHS;
2050 CCValid = SystemZ::CCMASK_VCMP;
2051 return true;
2052
2053 case Intrinsic::s390_vchlbs:
2054 case Intrinsic::s390_vchlhs:
2055 case Intrinsic::s390_vchlfs:
2056 case Intrinsic::s390_vchlgs:
2057 Opcode = SystemZISD::VICMPHLS;
2058 CCValid = SystemZ::CCMASK_VCMP;
2059 return true;
2060
2061 case Intrinsic::s390_vtm:
2062 Opcode = SystemZISD::VTM;
2063 CCValid = SystemZ::CCMASK_VCMP;
2064 return true;
2065
2066 case Intrinsic::s390_vfaebs:
2067 case Intrinsic::s390_vfaehs:
2068 case Intrinsic::s390_vfaefs:
2069 Opcode = SystemZISD::VFAE_CC;
2070 CCValid = SystemZ::CCMASK_ANY;
2071 return true;
2072
2073 case Intrinsic::s390_vfaezbs:
2074 case Intrinsic::s390_vfaezhs:
2075 case Intrinsic::s390_vfaezfs:
2076 Opcode = SystemZISD::VFAEZ_CC;
2077 CCValid = SystemZ::CCMASK_ANY;
2078 return true;
2079
2080 case Intrinsic::s390_vfeebs:
2081 case Intrinsic::s390_vfeehs:
2082 case Intrinsic::s390_vfeefs:
2083 Opcode = SystemZISD::VFEE_CC;
2084 CCValid = SystemZ::CCMASK_ANY;
2085 return true;
2086
2087 case Intrinsic::s390_vfeezbs:
2088 case Intrinsic::s390_vfeezhs:
2089 case Intrinsic::s390_vfeezfs:
2090 Opcode = SystemZISD::VFEEZ_CC;
2091 CCValid = SystemZ::CCMASK_ANY;
2092 return true;
2093
2094 case Intrinsic::s390_vfenebs:
2095 case Intrinsic::s390_vfenehs:
2096 case Intrinsic::s390_vfenefs:
2097 Opcode = SystemZISD::VFENE_CC;
2098 CCValid = SystemZ::CCMASK_ANY;
2099 return true;
2100
2101 case Intrinsic::s390_vfenezbs:
2102 case Intrinsic::s390_vfenezhs:
2103 case Intrinsic::s390_vfenezfs:
2104 Opcode = SystemZISD::VFENEZ_CC;
2105 CCValid = SystemZ::CCMASK_ANY;
2106 return true;
2107
2108 case Intrinsic::s390_vistrbs:
2109 case Intrinsic::s390_vistrhs:
2110 case Intrinsic::s390_vistrfs:
2111 Opcode = SystemZISD::VISTR_CC;
2112 CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3;
2113 return true;
2114
2115 case Intrinsic::s390_vstrcbs:
2116 case Intrinsic::s390_vstrchs:
2117 case Intrinsic::s390_vstrcfs:
2118 Opcode = SystemZISD::VSTRC_CC;
2119 CCValid = SystemZ::CCMASK_ANY;
2120 return true;
2121
2122 case Intrinsic::s390_vstrczbs:
2123 case Intrinsic::s390_vstrczhs:
2124 case Intrinsic::s390_vstrczfs:
2125 Opcode = SystemZISD::VSTRCZ_CC;
2126 CCValid = SystemZ::CCMASK_ANY;
2127 return true;
2128
2129 case Intrinsic::s390_vstrsb:
2130 case Intrinsic::s390_vstrsh:
2131 case Intrinsic::s390_vstrsf:
2132 Opcode = SystemZISD::VSTRS_CC;
2133 CCValid = SystemZ::CCMASK_ANY;
2134 return true;
2135
2136 case Intrinsic::s390_vstrszb:
2137 case Intrinsic::s390_vstrszh:
2138 case Intrinsic::s390_vstrszf:
2139 Opcode = SystemZISD::VSTRSZ_CC;
2140 CCValid = SystemZ::CCMASK_ANY;
2141 return true;
2142
2143 case Intrinsic::s390_vfcedbs:
2144 case Intrinsic::s390_vfcesbs:
2145 Opcode = SystemZISD::VFCMPES;
2146 CCValid = SystemZ::CCMASK_VCMP;
2147 return true;
2148
2149 case Intrinsic::s390_vfchdbs:
2150 case Intrinsic::s390_vfchsbs:
2151 Opcode = SystemZISD::VFCMPHS;
2152 CCValid = SystemZ::CCMASK_VCMP;
2153 return true;
2154
2155 case Intrinsic::s390_vfchedbs:
2156 case Intrinsic::s390_vfchesbs:
2157 Opcode = SystemZISD::VFCMPHES;
2158 CCValid = SystemZ::CCMASK_VCMP;
2159 return true;
2160
2161 case Intrinsic::s390_vftcidb:
2162 case Intrinsic::s390_vftcisb:
2163 Opcode = SystemZISD::VFTCI;
2164 CCValid = SystemZ::CCMASK_VCMP;
2165 return true;
2166
2167 case Intrinsic::s390_tdc:
2168 Opcode = SystemZISD::TDC;
2169 CCValid = SystemZ::CCMASK_TDC;
2170 return true;
2171
2172 default:
2173 return false;
2174 }
2175}
2176
2177// Emit an intrinsic with chain and an explicit CC register result.
2178static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op,
2179 unsigned Opcode) {
2180 // Copy all operands except the intrinsic ID.
2181 unsigned NumOps = Op.getNumOperands();
2182 SmallVector<SDValue, 6> Ops;
2183 Ops.reserve(NumOps - 1);
2184 Ops.push_back(Op.getOperand(0));
2185 for (unsigned I = 2; I < NumOps; ++I)
2186 Ops.push_back(Op.getOperand(I));
2187
2188 assert(Op->getNumValues() == 2 && "Expected only CC result and chain")(static_cast <bool> (Op->getNumValues() == 2 &&
"Expected only CC result and chain") ? void (0) : __assert_fail
("Op->getNumValues() == 2 && \"Expected only CC result and chain\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2188, __extension__
__PRETTY_FUNCTION__))
;
2189 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2190 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2191 SDValue OldChain = SDValue(Op.getNode(), 1);
2192 SDValue NewChain = SDValue(Intr.getNode(), 1);
2193 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2194 return Intr.getNode();
2195}
2196
2197// Emit an intrinsic with an explicit CC register result.
2198static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op,
2199 unsigned Opcode) {
2200 // Copy all operands except the intrinsic ID.
2201 unsigned NumOps = Op.getNumOperands();
2202 SmallVector<SDValue, 6> Ops;
2203 Ops.reserve(NumOps - 1);
2204 for (unsigned I = 1; I < NumOps; ++I)
2205 Ops.push_back(Op.getOperand(I));
2206
2207 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2208 return Intr.getNode();
2209}
2210
2211// CC is a comparison that will be implemented using an integer or
2212// floating-point comparison. Return the condition code mask for
2213// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2214// unsigned comparisons and clear for signed ones. In the floating-point
2215// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2216static unsigned CCMaskForCondCode(ISD::CondCode CC) {
2217#define CONV(X) \
2218 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2219 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2220 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2221
2222 switch (CC) {
2223 default:
2224 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2224)
;
2225
2226 CONV(EQ);
2227 CONV(NE);
2228 CONV(GT);
2229 CONV(GE);
2230 CONV(LT);
2231 CONV(LE);
2232
2233 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2234 case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
2235 }
2236#undef CONV
2237}
2238
2239// If C can be converted to a comparison against zero, adjust the operands
2240// as necessary.
2241static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2242 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2243 return;
2244
2245 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2246 if (!ConstOp1)
2247 return;
2248
2249 int64_t Value = ConstOp1->getSExtValue();
2250 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2251 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2252 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2253 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2254 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2255 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2256 }
2257}
2258
2259// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2260// adjust the operands as necessary.
2261static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2262 Comparison &C) {
2263 // For us to make any changes, it must a comparison between a single-use
2264 // load and a constant.
2265 if (!C.Op0.hasOneUse() ||
2266 C.Op0.getOpcode() != ISD::LOAD ||
2267 C.Op1.getOpcode() != ISD::Constant)
2268 return;
2269
2270 // We must have an 8- or 16-bit load.
2271 auto *Load = cast<LoadSDNode>(C.Op0);
2272 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2273 if ((NumBits != 8 && NumBits != 16) ||
2274 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2275 return;
2276
2277 // The load must be an extending one and the constant must be within the
2278 // range of the unextended value.
2279 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2280 uint64_t Value = ConstOp1->getZExtValue();
2281 uint64_t Mask = (1 << NumBits) - 1;
2282 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2283 // Make sure that ConstOp1 is in range of C.Op0.
2284 int64_t SignedValue = ConstOp1->getSExtValue();
2285 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2286 return;
2287 if (C.ICmpType != SystemZICMP::SignedOnly) {
2288 // Unsigned comparison between two sign-extended values is equivalent
2289 // to unsigned comparison between two zero-extended values.
2290 Value &= Mask;
2291 } else if (NumBits == 8) {
2292 // Try to treat the comparison as unsigned, so that we can use CLI.
2293 // Adjust CCMask and Value as necessary.
2294 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2295 // Test whether the high bit of the byte is set.
2296 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2297 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2298 // Test whether the high bit of the byte is clear.
2299 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2300 else
2301 // No instruction exists for this combination.
2302 return;
2303 C.ICmpType = SystemZICMP::UnsignedOnly;
2304 }
2305 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2306 if (Value > Mask)
2307 return;
2308 // If the constant is in range, we can use any comparison.
2309 C.ICmpType = SystemZICMP::Any;
2310 } else
2311 return;
2312
2313 // Make sure that the first operand is an i32 of the right extension type.
2314 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2315 ISD::SEXTLOAD :
2316 ISD::ZEXTLOAD);
2317 if (C.Op0.getValueType() != MVT::i32 ||
2318 Load->getExtensionType() != ExtType) {
2319 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2320 Load->getBasePtr(), Load->getPointerInfo(),
2321 Load->getMemoryVT(), Load->getAlign(),
2322 Load->getMemOperand()->getFlags());
2323 // Update the chain uses.
2324 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2325 }
2326
2327 // Make sure that the second operand is an i32 with the right value.
2328 if (C.Op1.getValueType() != MVT::i32 ||
2329 Value != ConstOp1->getZExtValue())
2330 C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2331}
2332
2333// Return true if Op is either an unextended load, or a load suitable
2334// for integer register-memory comparisons of type ICmpType.
2335static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2336 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2337 if (Load) {
2338 // There are no instructions to compare a register with a memory byte.
2339 if (Load->getMemoryVT() == MVT::i8)
2340 return false;
2341 // Otherwise decide on extension type.
2342 switch (Load->getExtensionType()) {
2343 case ISD::NON_EXTLOAD:
2344 return true;
2345 case ISD::SEXTLOAD:
2346 return ICmpType != SystemZICMP::UnsignedOnly;
2347 case ISD::ZEXTLOAD:
2348 return ICmpType != SystemZICMP::SignedOnly;
2349 default:
2350 break;
2351 }
2352 }
2353 return false;
2354}
2355
2356// Return true if it is better to swap the operands of C.
2357static bool shouldSwapCmpOperands(const Comparison &C) {
2358 // Leave f128 comparisons alone, since they have no memory forms.
2359 if (C.Op0.getValueType() == MVT::f128)
2360 return false;
2361
2362 // Always keep a floating-point constant second, since comparisons with
2363 // zero can use LOAD TEST and comparisons with other constants make a
2364 // natural memory operand.
2365 if (isa<ConstantFPSDNode>(C.Op1))
2366 return false;
2367
2368 // Never swap comparisons with zero since there are many ways to optimize
2369 // those later.
2370 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2371 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2372 return false;
2373
2374 // Also keep natural memory operands second if the loaded value is
2375 // only used here. Several comparisons have memory forms.
2376 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2377 return false;
2378
2379 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2380 // In that case we generally prefer the memory to be second.
2381 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2382 // The only exceptions are when the second operand is a constant and
2383 // we can use things like CHHSI.
2384 if (!ConstOp1)
2385 return true;
2386 // The unsigned memory-immediate instructions can handle 16-bit
2387 // unsigned integers.
2388 if (C.ICmpType != SystemZICMP::SignedOnly &&
2389 isUInt<16>(ConstOp1->getZExtValue()))
2390 return false;
2391 // The signed memory-immediate instructions can handle 16-bit
2392 // signed integers.
2393 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2394 isInt<16>(ConstOp1->getSExtValue()))
2395 return false;
2396 return true;
2397 }
2398
2399 // Try to promote the use of CGFR and CLGFR.
2400 unsigned Opcode0 = C.Op0.getOpcode();
2401 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2402 return true;
2403 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2404 return true;
2405 if (C.ICmpType != SystemZICMP::SignedOnly &&
2406 Opcode0 == ISD::AND &&
2407 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2408 cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
2409 return true;
2410
2411 return false;
2412}
2413
2414// Check whether C tests for equality between X and Y and whether X - Y
2415// or Y - X is also computed. In that case it's better to compare the
2416// result of the subtraction against zero.
2417static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
2418 Comparison &C) {
2419 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2420 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2421 for (SDNode *N : C.Op0->uses()) {
2422 if (N->getOpcode() == ISD::SUB &&
2423 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2424 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2425 C.Op0 = SDValue(N, 0);
2426 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2427 return;
2428 }
2429 }
2430 }
2431}
2432
2433// Check whether C compares a floating-point value with zero and if that
2434// floating-point value is also negated. In this case we can use the
2435// negation to set CC, so avoiding separate LOAD AND TEST and
2436// LOAD (NEGATIVE/COMPLEMENT) instructions.
2437static void adjustForFNeg(Comparison &C) {
2438 // This optimization is invalid for strict comparisons, since FNEG
2439 // does not raise any exceptions.
2440 if (C.Chain)
2441 return;
2442 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2443 if (C1 && C1->isZero()) {
2444 for (SDNode *N : C.Op0->uses()) {
2445 if (N->getOpcode() == ISD::FNEG) {
2446 C.Op0 = SDValue(N, 0);
2447 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2448 return;
2449 }
2450 }
2451 }
2452}
2453
2454// Check whether C compares (shl X, 32) with 0 and whether X is
2455// also sign-extended. In that case it is better to test the result
2456// of the sign extension using LTGFR.
2457//
2458// This case is important because InstCombine transforms a comparison
2459// with (sext (trunc X)) into a comparison with (shl X, 32).
2460static void adjustForLTGFR(Comparison &C) {
2461 // Check for a comparison between (shl X, 32) and 0.
2462 if (C.Op0.getOpcode() == ISD::SHL &&
2463 C.Op0.getValueType() == MVT::i64 &&
2464 C.Op1.getOpcode() == ISD::Constant &&
2465 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2466 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2467 if (C1 && C1->getZExtValue() == 32) {
2468 SDValue ShlOp0 = C.Op0.getOperand(0);
2469 // See whether X has any SIGN_EXTEND_INREG uses.
2470 for (SDNode *N : ShlOp0->uses()) {
2471 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2472 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2473 C.Op0 = SDValue(N, 0);
2474 return;
2475 }
2476 }
2477 }
2478 }
2479}
2480
2481// If C compares the truncation of an extending load, try to compare
2482// the untruncated value instead. This exposes more opportunities to
2483// reuse CC.
2484static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2485 Comparison &C) {
2486 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2487 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2488 C.Op1.getOpcode() == ISD::Constant &&
2489 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2490 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2491 if (L->getMemoryVT().getStoreSizeInBits().getFixedSize() <=
2492 C.Op0.getValueSizeInBits().getFixedSize()) {
2493 unsigned Type = L->getExtensionType();
2494 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2495 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2496 C.Op0 = C.Op0.getOperand(0);
2497 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2498 }
2499 }
2500 }
2501}
2502
2503// Return true if shift operation N has an in-range constant shift value.
2504// Store it in ShiftVal if so.
2505static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2506 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2507 if (!Shift)
2508 return false;
2509
2510 uint64_t Amount = Shift->getZExtValue();
2511 if (Amount >= N.getValueSizeInBits())
2512 return false;
2513
2514 ShiftVal = Amount;
2515 return true;
2516}
2517
2518// Check whether an AND with Mask is suitable for a TEST UNDER MASK
2519// instruction and whether the CC value is descriptive enough to handle
2520// a comparison of type Opcode between the AND result and CmpVal.
2521// CCMask says which comparison result is being tested and BitSize is
2522// the number of bits in the operands. If TEST UNDER MASK can be used,
2523// return the corresponding CC mask, otherwise return 0.
2524static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2525 uint64_t Mask, uint64_t CmpVal,
2526 unsigned ICmpType) {
2527 assert(Mask != 0 && "ANDs with zero should have been removed by now")(static_cast <bool> (Mask != 0 && "ANDs with zero should have been removed by now"
) ? void (0) : __assert_fail ("Mask != 0 && \"ANDs with zero should have been removed by now\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2527, __extension__
__PRETTY_FUNCTION__))
;
2528
2529 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2530 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2531 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2532 return 0;
2533
2534 // Work out the masks for the lowest and highest bits.
2535 unsigned HighShift = 63 - countLeadingZeros(Mask);
2536 uint64_t High = uint64_t(1) << HighShift;
2537 uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
2538
2539 // Signed ordered comparisons are effectively unsigned if the sign
2540 // bit is dropped.
2541 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2542
2543 // Check for equality comparisons with 0, or the equivalent.
2544 if (CmpVal == 0) {
2545 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2546 return SystemZ::CCMASK_TM_ALL_0;
2547 if (CCMask == SystemZ::CCMASK_CMP_NE)
2548 return SystemZ::CCMASK_TM_SOME_1;
2549 }
2550 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2551 if (CCMask == SystemZ::CCMASK_CMP_LT)
2552 return SystemZ::CCMASK_TM_ALL_0;
2553 if (CCMask == SystemZ::CCMASK_CMP_GE)
2554 return SystemZ::CCMASK_TM_SOME_1;
2555 }
2556 if (EffectivelyUnsigned && CmpVal < Low) {
2557 if (CCMask == SystemZ::CCMASK_CMP_LE)
2558 return SystemZ::CCMASK_TM_ALL_0;
2559 if (CCMask == SystemZ::CCMASK_CMP_GT)
2560 return SystemZ::CCMASK_TM_SOME_1;
2561 }
2562
2563 // Check for equality comparisons with the mask, or the equivalent.
2564 if (CmpVal == Mask) {
2565 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2566 return SystemZ::CCMASK_TM_ALL_1;
2567 if (CCMask == SystemZ::CCMASK_CMP_NE)
2568 return SystemZ::CCMASK_TM_SOME_0;
2569 }
2570 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2571 if (CCMask == SystemZ::CCMASK_CMP_GT)
2572 return SystemZ::CCMASK_TM_ALL_1;
2573 if (CCMask == SystemZ::CCMASK_CMP_LE)
2574 return SystemZ::CCMASK_TM_SOME_0;
2575 }
2576 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2577 if (CCMask == SystemZ::CCMASK_CMP_GE)
2578 return SystemZ::CCMASK_TM_ALL_1;
2579 if (CCMask == SystemZ::CCMASK_CMP_LT)
2580 return SystemZ::CCMASK_TM_SOME_0;
2581 }
2582
2583 // Check for ordered comparisons with the top bit.
2584 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2585 if (CCMask == SystemZ::CCMASK_CMP_LE)
2586 return SystemZ::CCMASK_TM_MSB_0;
2587 if (CCMask == SystemZ::CCMASK_CMP_GT)
2588 return SystemZ::CCMASK_TM_MSB_1;
2589 }
2590 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2591 if (CCMask == SystemZ::CCMASK_CMP_LT)
2592 return SystemZ::CCMASK_TM_MSB_0;
2593 if (CCMask == SystemZ::CCMASK_CMP_GE)
2594 return SystemZ::CCMASK_TM_MSB_1;
2595 }
2596
2597 // If there are just two bits, we can do equality checks for Low and High
2598 // as well.
2599 if (Mask == Low + High) {
2600 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2601 return SystemZ::CCMASK_TM_MIXED_MSB_0;
2602 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2603 return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY;
2604 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2605 return SystemZ::CCMASK_TM_MIXED_MSB_1;
2606 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2607 return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY;
2608 }
2609
2610 // Looks like we've exhausted our options.
2611 return 0;
2612}
2613
2614// See whether C can be implemented as a TEST UNDER MASK instruction.
2615// Update the arguments with the TM version if so.
2616static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
2617 Comparison &C) {
2618 // Check that we have a comparison with a constant.
2619 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2620 if (!ConstOp1)
2621 return;
2622 uint64_t CmpVal = ConstOp1->getZExtValue();
2623
2624 // Check whether the nonconstant input is an AND with a constant mask.
2625 Comparison NewC(C);
2626 uint64_t MaskVal;
2627 ConstantSDNode *Mask = nullptr;
2628 if (C.Op0.getOpcode() == ISD::AND) {
2629 NewC.Op0 = C.Op0.getOperand(0);
2630 NewC.Op1 = C.Op0.getOperand(1);
2631 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2632 if (!Mask)
2633 return;
2634 MaskVal = Mask->getZExtValue();
2635 } else {
2636 // There is no instruction to compare with a 64-bit immediate
2637 // so use TMHH instead if possible. We need an unsigned ordered
2638 // comparison with an i64 immediate.
2639 if (NewC.Op0.getValueType() != MVT::i64 ||
2640 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2641 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2642 NewC.ICmpType == SystemZICMP::SignedOnly)
2643 return;
2644 // Convert LE and GT comparisons into LT and GE.
2645 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2646 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2647 if (CmpVal == uint64_t(-1))
2648 return;
2649 CmpVal += 1;
2650 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2651 }
2652 // If the low N bits of Op1 are zero than the low N bits of Op0 can
2653 // be masked off without changing the result.
2654 MaskVal = -(CmpVal & -CmpVal);
2655 NewC.ICmpType = SystemZICMP::UnsignedOnly;
2656 }
2657 if (!MaskVal)
2658 return;
2659
2660 // Check whether the combination of mask, comparison value and comparison
2661 // type are suitable.
2662 unsigned BitSize = NewC.Op0.getValueSizeInBits();
2663 unsigned NewCCMask, ShiftVal;
2664 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2665 NewC.Op0.getOpcode() == ISD::SHL &&
2666 isSimpleShift(NewC.Op0, ShiftVal) &&
2667 (MaskVal >> ShiftVal != 0) &&
2668 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2669 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2670 MaskVal >> ShiftVal,
2671 CmpVal >> ShiftVal,
2672 SystemZICMP::Any))) {
2673 NewC.Op0 = NewC.Op0.getOperand(0);
2674 MaskVal >>= ShiftVal;
2675 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2676 NewC.Op0.getOpcode() == ISD::SRL &&
2677 isSimpleShift(NewC.Op0, ShiftVal) &&
2678 (MaskVal << ShiftVal != 0) &&
2679 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2680 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2681 MaskVal << ShiftVal,
2682 CmpVal << ShiftVal,
2683 SystemZICMP::UnsignedOnly))) {
2684 NewC.Op0 = NewC.Op0.getOperand(0);
2685 MaskVal <<= ShiftVal;
2686 } else {
2687 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2688 NewC.ICmpType);
2689 if (!NewCCMask)
2690 return;
2691 }
2692
2693 // Go ahead and make the change.
2694 C.Opcode = SystemZISD::TM;
2695 C.Op0 = NewC.Op0;
2696 if (Mask && Mask->getZExtValue() == MaskVal)
2697 C.Op1 = SDValue(Mask, 0);
2698 else
2699 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2700 C.CCValid = SystemZ::CCMASK_TM;
2701 C.CCMask = NewCCMask;
2702}
2703
2704// See whether the comparison argument contains a redundant AND
2705// and remove it if so. This sometimes happens due to the generic
2706// BRCOND expansion.
2707static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
2708 Comparison &C) {
2709 if (C.Op0.getOpcode() != ISD::AND)
2710 return;
2711 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2712 if (!Mask)
2713 return;
2714 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
2715 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2716 return;
2717
2718 C.Op0 = C.Op0.getOperand(0);
2719}
2720
2721// Return a Comparison that tests the condition-code result of intrinsic
2722// node Call against constant integer CC using comparison code Cond.
2723// Opcode is the opcode of the SystemZISD operation for the intrinsic
2724// and CCValid is the set of possible condition-code results.
2725static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2726 SDValue Call, unsigned CCValid, uint64_t CC,
2727 ISD::CondCode Cond) {
2728 Comparison C(Call, SDValue(), SDValue());
2729 C.Opcode = Opcode;
2730 C.CCValid = CCValid;
2731 if (Cond == ISD::SETEQ)
2732 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2733 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2734 else if (Cond == ISD::SETNE)
2735 // ...and the inverse of that.
2736 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2737 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2738 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2739 // always true for CC>3.
2740 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
2741 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2742 // ...and the inverse of that.
2743 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2744 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2745 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2746 // always true for CC>3.
2747 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2748 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2749 // ...and the inverse of that.
2750 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
2751 else
2752 llvm_unreachable("Unexpected integer comparison type")::llvm::llvm_unreachable_internal("Unexpected integer comparison type"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2752)
;
2753 C.CCMask &= CCValid;
2754 return C;
2755}
2756
2757// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2758static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2759 ISD::CondCode Cond, const SDLoc &DL,
2760 SDValue Chain = SDValue(),
2761 bool IsSignaling = false) {
2762 if (CmpOp1.getOpcode() == ISD::Constant) {
2763 assert(!Chain)(static_cast <bool> (!Chain) ? void (0) : __assert_fail
("!Chain", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2763, __extension__ __PRETTY_FUNCTION__))
;
2764 uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2765 unsigned Opcode, CCValid;
2766 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2767 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2768 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2769 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2770 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2771 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2772 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2773 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2774 }
2775 Comparison C(CmpOp0, CmpOp1, Chain);
2776 C.CCMask = CCMaskForCondCode(Cond);
2777 if (C.Op0.getValueType().isFloatingPoint()) {
2778 C.CCValid = SystemZ::CCMASK_FCMP;
2779 if (!C.Chain)
2780 C.Opcode = SystemZISD::FCMP;
2781 else if (!IsSignaling)
2782 C.Opcode = SystemZISD::STRICT_FCMP;
2783 else
2784 C.Opcode = SystemZISD::STRICT_FCMPS;
2785 adjustForFNeg(C);
2786 } else {
2787 assert(!C.Chain)(static_cast <bool> (!C.Chain) ? void (0) : __assert_fail
("!C.Chain", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2787, __extension__ __PRETTY_FUNCTION__))
;
2788 C.CCValid = SystemZ::CCMASK_ICMP;
2789 C.Opcode = SystemZISD::ICMP;
2790 // Choose the type of comparison. Equality and inequality tests can
2791 // use either signed or unsigned comparisons. The choice also doesn't
2792 // matter if both sign bits are known to be clear. In those cases we
2793 // want to give the main isel code the freedom to choose whichever
2794 // form fits best.
2795 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2796 C.CCMask == SystemZ::CCMASK_CMP_NE ||
2797 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2798 C.ICmpType = SystemZICMP::Any;
2799 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2800 C.ICmpType = SystemZICMP::UnsignedOnly;
2801 else
2802 C.ICmpType = SystemZICMP::SignedOnly;
2803 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2804 adjustForRedundantAnd(DAG, DL, C);
2805 adjustZeroCmp(DAG, DL, C);
2806 adjustSubwordCmp(DAG, DL, C);
2807 adjustForSubtraction(DAG, DL, C);
2808 adjustForLTGFR(C);
2809 adjustICmpTruncate(DAG, DL, C);
2810 }
2811
2812 if (shouldSwapCmpOperands(C)) {
2813 std::swap(C.Op0, C.Op1);
2814 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2815 }
2816
2817 adjustForTestUnderMask(DAG, DL, C);
2818 return C;
2819}
2820
2821// Emit the comparison instruction described by C.
2822static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2823 if (!C.Op1.getNode()) {
2824 SDNode *Node;
2825 switch (C.Op0.getOpcode()) {
2826 case ISD::INTRINSIC_W_CHAIN:
2827 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
2828 return SDValue(Node, 0);
2829 case ISD::INTRINSIC_WO_CHAIN:
2830 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
2831 return SDValue(Node, Node->getNumValues() - 1);
2832 default:
2833 llvm_unreachable("Invalid comparison operands")::llvm::llvm_unreachable_internal("Invalid comparison operands"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2833)
;
2834 }
2835 }
2836 if (C.Opcode == SystemZISD::ICMP)
2837 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
2838 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
2839 if (C.Opcode == SystemZISD::TM) {
2840 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2841 bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2842 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
2843 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
2844 }
2845 if (C.Chain) {
2846 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
2847 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
2848 }
2849 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
2850}
2851
2852// Implement a 32-bit *MUL_LOHI operation by extending both operands to
2853// 64 bits. Extend is the extension type to use. Store the high part
2854// in Hi and the low part in Lo.
2855static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
2856 SDValue Op0, SDValue Op1, SDValue &Hi,
2857 SDValue &Lo) {
2858 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2859 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2860 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
2861 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2862 DAG.getConstant(32, DL, MVT::i64));
2863 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2864 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2865}
2866
2867// Lower a binary operation that produces two VT results, one in each
2868// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2869// and Opcode performs the GR128 operation. Store the even register result
2870// in Even and the odd register result in Odd.
2871static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2872 unsigned Opcode, SDValue Op0, SDValue Op1,
2873 SDValue &Even, SDValue &Odd) {
2874 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
2875 bool Is32Bit = is32Bit(VT);
2876 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2877 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
2878}
2879
2880// Return an i32 value that is 1 if the CC value produced by CCReg is
2881// in the mask CCMask and 0 otherwise. CC is known to have a value
2882// in CCValid, so other values can be ignored.
2883static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
2884 unsigned CCValid, unsigned CCMask) {
2885 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
2886 DAG.getConstant(0, DL, MVT::i32),
2887 DAG.getTargetConstant(CCValid, DL, MVT::i32),
2888 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
2889 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
2890}
2891
2892// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2893// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
2894// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
2895// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
2896// floating-point comparisons.
2897enum class CmpMode { Int, FP, StrictFP, SignalingFP };
2898static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) {
2899 switch (CC) {
2900 case ISD::SETOEQ:
2901 case ISD::SETEQ:
2902 switch (Mode) {
2903 case CmpMode::Int: return SystemZISD::VICMPE;
2904 case CmpMode::FP: return SystemZISD::VFCMPE;
2905 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
2906 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
2907 }
2908 llvm_unreachable("Bad mode")::llvm::llvm_unreachable_internal("Bad mode", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2908)
;
2909
2910 case ISD::SETOGE:
2911 case ISD::SETGE:
2912 switch (Mode) {
2913 case CmpMode::Int: return 0;
2914 case CmpMode::FP: return SystemZISD::VFCMPHE;
2915 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
2916 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
2917 }
2918 llvm_unreachable("Bad mode")::llvm::llvm_unreachable_internal("Bad mode", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2918)
;
2919
2920 case ISD::SETOGT:
2921 case ISD::SETGT:
2922 switch (Mode) {
2923 case CmpMode::Int: return SystemZISD::VICMPH;
2924 case CmpMode::FP: return SystemZISD::VFCMPH;
2925 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
2926 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
2927 }
2928 llvm_unreachable("Bad mode")::llvm::llvm_unreachable_internal("Bad mode", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2928)
;
2929
2930 case ISD::SETUGT:
2931 switch (Mode) {
2932 case CmpMode::Int: return SystemZISD::VICMPHL;
2933 case CmpMode::FP: return 0;
2934 case CmpMode::StrictFP: return 0;
2935 case CmpMode::SignalingFP: return 0;
2936 }
2937 llvm_unreachable("Bad mode")::llvm::llvm_unreachable_internal("Bad mode", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2937)
;
2938
2939 default:
2940 return 0;
2941 }
2942}
2943
2944// Return the SystemZISD vector comparison operation for CC or its inverse,
2945// or 0 if neither can be done directly. Indicate in Invert whether the
2946// result is for the inverse of CC. Mode is as above.
2947static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode,
2948 bool &Invert) {
2949 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2950 Invert = false;
2951 return Opcode;
2952 }
2953
2954 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
2955 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2956 Invert = true;
2957 return Opcode;
2958 }
2959
2960 return 0;
2961}
2962
2963// Return a v2f64 that contains the extended form of elements Start and Start+1
2964// of v4f32 value Op. If Chain is nonnull, return the strict form.
2965static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
2966 SDValue Op, SDValue Chain) {
2967 int Mask[] = { Start, -1, Start + 1, -1 };
2968 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
2969 if (Chain) {
2970 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
2971 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
2972 }
2973 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2974}
2975
2976// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2977// producing a result of type VT. If Chain is nonnull, return the strict form.
2978SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
2979 const SDLoc &DL, EVT VT,
2980 SDValue CmpOp0,
2981 SDValue CmpOp1,
2982 SDValue Chain) const {
2983 // There is no hardware support for v4f32 (unless we have the vector
2984 // enhancements facility 1), so extend the vector into two v2f64s
2985 // and compare those.
2986 if (CmpOp0.getValueType() == MVT::v4f32 &&
2987 !Subtarget.hasVectorEnhancements1()) {
2988 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
2989 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
2990 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
2991 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
2992 if (Chain) {
2993 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
2994 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
2995 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
2996 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2997 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
2998 H1.getValue(1), L1.getValue(1),
2999 HRes.getValue(1), LRes.getValue(1) };
3000 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3001 SDValue Ops[2] = { Res, NewChain };
3002 return DAG.getMergeValues(Ops, DL);
3003 }
3004 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3005 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3006 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3007 }
3008 if (Chain) {
3009 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3010 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3011 }
3012 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3013}
3014
3015// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3016// an integer mask of type VT. If Chain is nonnull, we have a strict
3017// floating-point comparison. If in addition IsSignaling is true, we have
3018// a strict signaling floating-point comparison.
3019SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3020 const SDLoc &DL, EVT VT,
3021 ISD::CondCode CC,
3022 SDValue CmpOp0,
3023 SDValue CmpOp1,
3024 SDValue Chain,
3025 bool IsSignaling) const {
3026 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3027 assert (!Chain || IsFP)(static_cast <bool> (!Chain || IsFP) ? void (0) : __assert_fail
("!Chain || IsFP", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 3027, __extension__ __PRETTY_FUNCTION__))
;
3028 assert (!IsSignaling || Chain)(static_cast <bool> (!IsSignaling || Chain) ? void (0) :
__assert_fail ("!IsSignaling || Chain", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 3028, __extension__ __PRETTY_FUNCTION__))
;
3029 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3030 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3031 bool Invert = false;
3032 SDValue Cmp;
3033 switch (CC) {
3034 // Handle tests for order using (or (ogt y x) (oge x y)).
3035 case ISD::SETUO:
3036 Invert = true;
3037 [[fallthrough]];
3038 case ISD::SETO: {
3039 assert(IsFP && "Unexpected integer comparison")(static_cast <bool> (IsFP && "Unexpected integer comparison"
) ? void (0) : __assert_fail ("IsFP && \"Unexpected integer comparison\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 3039, __extension__
__PRETTY_FUNCTION__))
;
3040 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3041 DL, VT, CmpOp1, CmpOp0, Chain);
3042 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3043 DL, VT, CmpOp0, CmpOp1, Chain);
3044 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3045 if (Chain)
3046 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3047 LT.getValue(1), GE.getValue(1));
3048 break;
3049 }
3050
3051 // Handle <> tests using (or (ogt y x) (ogt x y)).
3052 case ISD::SETUEQ:
3053 Invert = true;
3054 [[fallthrough]];
3055 case ISD::SETONE: {
3056 assert(IsFP && "Unexpected integer comparison")(static_cast <bool> (IsFP && "Unexpected integer comparison"
) ? void (0) : __assert_fail ("IsFP && \"Unexpected integer comparison\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 3056, __extension__
__PRETTY_FUNCTION__))
;
3057 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3058 DL, VT, CmpOp1, CmpOp0, Chain);
3059 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3060 DL, VT, CmpOp0, CmpOp1, Chain);
3061 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3062 if (Chain)
3063 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3064 LT.getValue(1), GT.getValue(1));
3065 break;
3066 }
3067
3068 // Otherwise a single comparison is enough. It doesn't really
3069 // matter whether we try the inversion or the swap first, since
3070 // there are no cases where both work.
3071 default:
3072 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3073 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3074 else {
3075 CC = ISD::getSetCCSwappedOperands(CC);
3076 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3077 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3078 else
3079 llvm_unreachable("Unhandled comparison")::llvm::llvm_unreachable_internal("Unhandled comparison", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 3079)
;
3080 }
3081 if (Chain)
3082 Chain = Cmp.getValue(1);
3083 break;
3084 }
3085 if (Invert) {
3086 SDValue Mask =
3087 DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
3088 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3089 }
3090 if (Chain && Chain.getNode() != Cmp.getNode()) {
3091 SDValue Ops[2] = { Cmp, Chain };
3092 Cmp = DAG.getMergeValues(Ops, DL);
3093 }
3094 return Cmp;
3095}
3096
3097SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3098 SelectionDAG &DAG) const {
3099 SDValue CmpOp0 = Op.getOperand(0);
3100 SDValue CmpOp1 = Op.getOperand(1);
3101 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3102 SDLoc DL(Op);
3103 EVT VT = Op.getValueType();
3104 if (VT.isVector())
3105 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3106
3107 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3108 SDValue CCReg = emitCmp(DAG, DL, C);
3109 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3110}
3111
3112SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3113 SelectionDAG &DAG,
3114 bool IsSignaling) const {
3115 SDValue Chain = Op.getOperand(0);
3116 SDValue CmpOp0 = Op.getOperand(1);
3117 SDValue CmpOp1 = Op.getOperand(2);
3118 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3119 SDLoc DL(Op);
3120 EVT VT = Op.getNode()->getValueType(0);
3121 if (VT.isVector()) {
3122 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3123 Chain, IsSignaling);
3124 return Res.getValue(Op.getResNo());
3125 }
3126
3127 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3128 SDValue CCReg = emitCmp(DAG, DL, C);
3129 CCReg->setFlags(Op->getFlags());
3130 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3131 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3132 return DAG.getMergeValues(Ops, DL);
3133}
3134
3135SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3136 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3137 SDValue CmpOp0 = Op.getOperand(2);
3138 SDValue CmpOp1 = Op.getOperand(3);
3139 SDValue Dest = Op.getOperand(4);
3140 SDLoc DL(Op);
3141
3142 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3143 SDValue CCReg = emitCmp(DAG, DL, C);
3144 return DAG.getNode(
3145 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3146 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3147 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3148}
3149
3150// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3151// allowing Pos and Neg to be wider than CmpOp.
3152static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3153 return (Neg.getOpcode() == ISD::SUB &&
3154 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3155 cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
3156 Neg.getOperand(1) == Pos &&
3157 (Pos == CmpOp ||
3158 (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3159 Pos.getOperand(0) == CmpOp)));
3160}
3161
3162// Return the absolute or negative absolute of Op; IsNegative decides which.
3163static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op,
3164 bool IsNegative) {
3165 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3166 if (IsNegative)
3167 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3168 DAG.getConstant(0, DL, Op.getValueType()), Op);
3169 return Op;
3170}
3171
3172SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3173 SelectionDAG &DAG) const {
3174 SDValue CmpOp0 = Op.getOperand(0);
3175 SDValue CmpOp1 = Op.getOperand(1);
3176 SDValue TrueOp = Op.getOperand(2);
3177 SDValue FalseOp = Op.getOperand(3);
3178 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3179 SDLoc DL(Op);
3180
3181 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3182
3183 // Check for absolute and negative-absolute selections, including those
3184 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3185 // This check supplements the one in DAGCombiner.
3186 if (C.Opcode == SystemZISD::ICMP &&
3187 C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3188 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3189 C.Op1.getOpcode() == ISD::Constant &&
3190 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
3191 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3192 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3193 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3194 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3195 }
3196
3197 SDValue CCReg = emitCmp(DAG, DL, C);
3198 SDValue Ops[] = {TrueOp, FalseOp,
3199 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3200 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3201
3202 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3203}
3204
3205SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3206 SelectionDAG &DAG) const {
3207 SDLoc DL(Node);
3208 const GlobalValue *GV = Node->getGlobal();
3209 int64_t Offset = Node->getOffset();
3210 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3211 CodeModel::Model CM = DAG.getTarget().getCodeModel();
3212
3213 SDValue Result;
3214 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3215 if (isInt<32>(Offset)) {
3216 // Assign anchors at 1<<12 byte boundaries.
3217 uint64_t Anchor = Offset & ~uint64_t(0xfff);
3218 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3219 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3220
3221 // The offset can be folded into the address if it is aligned to a
3222 // halfword.
3223 Offset -= Anchor;
3224 if (Offset != 0 && (Offset & 1) == 0) {
3225 SDValue Full =
3226 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3227 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3228 Offset = 0;
3229 }
3230 } else {
3231 // Conservatively load a constant offset greater than 32 bits into a
3232 // register below.
3233 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3234 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3235 }
3236 } else {
3237 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3238 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3239 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3240 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3241 }
3242
3243 // If there was a non-zero offset that we didn't fold, create an explicit
3244 // addition for it.
3245 if (Offset != 0)
3246 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3247 DAG.getConstant(Offset, DL, PtrVT));
3248
3249 return Result;
3250}
3251
3252SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3253 SelectionDAG &DAG,
3254 unsigned Opcode,
3255 SDValue GOTOffset) const {
3256 SDLoc DL(Node);
3257 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3258 SDValue Chain = DAG.getEntryNode();
3259 SDValue Glue;
3260
3261 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3262 CallingConv::GHC)
3263 report_fatal_error("In GHC calling convention TLS is not supported");
3264
3265 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3266 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3267 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3268 Glue = Chain.getValue(1);
3269 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3270 Glue = Chain.getValue(1);
3271
3272 // The first call operand is the chain and the second is the TLS symbol.
3273 SmallVector<SDValue, 8> Ops;
3274 Ops.push_back(Chain);
3275 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3276 Node->getValueType(0),
3277 0, 0));
3278
3279 // Add argument registers to the end of the list so that they are
3280 // known live into the call.
3281 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3282 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3283
3284 // Add a register mask operand representing the call-preserved registers.
3285 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3286 const uint32_t *Mask =
3287 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3288 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 3288, __extension__
__PRETTY_FUNCTION__))
;
3289 Ops.push_back(DAG.getRegisterMask(Mask));
3290
3291 // Glue the call to the argument copies.
3292 Ops.push_back(Glue);
3293
3294 // Emit the call.
3295 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3296 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3297 Glue = Chain.getValue(1);
3298
3299 // Copy the return value from %r2.
3300 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3301}
3302
3303SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3304 SelectionDAG &DAG) const {
3305 SDValue Chain = DAG.getEntryNode();
3306 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3307
3308 // The high part of the thread pointer is in access register 0.
3309 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3310 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3311
3312 // The low part of the thread pointer is in access register 1.
3313 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3314 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3315
3316 // Merge them into a single 64-bit address.
3317 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3318 DAG.getConstant(32, DL, PtrVT));
3319 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3320}
3321
3322SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3323 SelectionDAG &DAG) const {
3324 if (DAG.getTarget().useEmulatedTLS())
3325 return LowerToTLSEmulatedModel(Node, DAG);
3326 SDLoc DL(Node);
3327 const GlobalValue *GV = Node->getGlobal();
3328 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3329 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
3330
3331 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3332 CallingConv::GHC)
3333 report_fatal_error("In GHC calling convention TLS is not supported");
3334
3335 SDValue TP = lowerThreadPointer(DL, DAG);
3336
3337 // Get the offset of GA from the thread pointer, based on the TLS model.
3338 SDValue Offset;
3339 switch (model) {
3340 case TLSModel::GeneralDynamic: {
3341 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3342 SystemZConstantPoolValue *CPV =
3343 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
3344
3345 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3346 Offset = DAG.getLoad(
3347 PtrVT, DL, DAG.getEntryNode(), Offset,
3348 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3349
3350 // Call __tls_get_offset to retrieve the offset.
3351 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3352 break;
3353 }
3354
3355 case TLSModel::LocalDynamic: {
3356 // Load the GOT offset of the module ID.
3357 SystemZConstantPoolValue *CPV =
3358 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
3359
3360 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3361 Offset = DAG.getLoad(
3362 PtrVT, DL, DAG.getEntryNode(), Offset,
3363 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3364
3365 // Call __tls_get_offset to retrieve the module base offset.
3366 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3367
3368 // Note: The SystemZLDCleanupPass will remove redundant computations
3369 // of the module base offset. Count total number of local-dynamic
3370 // accesses to trigger execution of that pass.
3371 SystemZMachineFunctionInfo* MFI =
3372 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
3373 MFI->incNumLocalDynamicTLSAccesses();
3374
3375 // Add the per-symbol offset.
3376 CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
3377
3378 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3379 DTPOffset = DAG.getLoad(
3380 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3381 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3382
3383 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3384 break;
3385 }
3386
3387 case TLSModel::InitialExec: {
3388 // Load the offset from the GOT.
3389 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3390 SystemZII::MO_INDNTPOFF);
3391 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
3392 Offset =
3393 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3394 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3395 break;
3396 }
3397
3398 case TLSModel::LocalExec: {
3399 // Force the offset into the constant pool and load it from there.
3400 SystemZConstantPoolValue *CPV =
3401 SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
3402
3403 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3404 Offset = DAG.getLoad(
3405 PtrVT, DL, DAG.getEntryNode(), Offset,
3406 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3407 break;
3408 }
3409 }
3410
3411 // Add the base and offset together.
3412 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3413}
3414
3415SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3416 SelectionDAG &DAG) const {
3417 SDLoc DL(Node);
3418 const BlockAddress *BA = Node->getBlockAddress();
3419 int64_t Offset = Node->getOffset();
3420 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3421
3422 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3423 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3424 return Result;
3425}
3426
3427SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3428 SelectionDAG &DAG) const {
3429 SDLoc DL(JT);
3430 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3431 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3432
3433 // Use LARL to load the address of the table.
3434 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3435}
3436
3437SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3438 SelectionDAG &DAG) const {
3439 SDLoc DL(CP);
3440 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3441
3442 SDValue Result;
3443 if (CP->isMachineConstantPoolEntry())
3444 Result =
3445 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3446 else
3447 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3448 CP->getOffset());
3449
3450 // Use LARL to load the address of the constant pool entry.
3451 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3452}
3453
3454SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3455 SelectionDAG &DAG) const {
3456 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
3457 MachineFunction &MF = DAG.getMachineFunction();
3458 MachineFrameInfo &MFI = MF.getFrameInfo();
3459 MFI.setFrameAddressIsTaken(true);
3460
3461 SDLoc DL(Op);
3462 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3463 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3464
3465 // By definition, the frame address is the address of the back chain. (In
3466 // the case of packed stack without backchain, return the address where the
3467 // backchain would have been stored. This will either be an unused space or
3468 // contain a saved register).
3469 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3470 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3471
3472 // FIXME The frontend should detect this case.
3473 if (Depth > 0) {
3474 report_fatal_error("Unsupported stack frame traversal count");
3475 }
3476
3477 return BackChain;
3478}
3479
3480SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3481 SelectionDAG &DAG) const {
3482 MachineFunction &MF = DAG.getMachineFunction();
3483 MachineFrameInfo &MFI = MF.getFrameInfo();
3484 MFI.setReturnAddressIsTaken(true);
3485
3486 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
3487 return SDValue();
3488
3489 SDLoc DL(Op);
3490 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3491 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3492
3493 // FIXME The frontend should detect this case.
3494 if (Depth > 0) {
3495 report_fatal_error("Unsupported stack frame traversal count");
3496 }
3497
3498 // Return R14D, which has the return address. Mark it an implicit live-in.
3499 Register LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
3500 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3501}
3502
3503SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3504 SelectionDAG &DAG) const {
3505 SDLoc DL(Op);
3506 SDValue In = Op.getOperand(0);
3507 EVT InVT = In.getValueType();
3508 EVT ResVT = Op.getValueType();
3509
3510 // Convert loads directly. This is normally done by DAGCombiner,
3511 // but we need this case for bitcasts that are created during lowering
3512 // and which are then lowered themselves.
3513 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3514 if (ISD::isNormalLoad(LoadN)) {
3515 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3516 LoadN->getBasePtr(), LoadN->getMemOperand());
3517 // Update the chain uses.
3518 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3519 return NewLoad;
3520 }
3521
3522 if (InVT == MVT::i32 && ResVT == MVT::f32) {
3523 SDValue In64;
3524 if (Subtarget.hasHighWord()) {
3525 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3526 MVT::i64);
3527 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3528 MVT::i64, SDValue(U64, 0), In);
3529 } else {
3530 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3531 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3532 DAG.getConstant(32, DL, MVT::i64));
3533 }
3534 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3535 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3536 DL, MVT::f32, Out64);
3537 }
3538 if (InVT == MVT::f32 && ResVT == MVT::i32) {
3539 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3540 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3541 MVT::f64, SDValue(U64, 0), In);
3542 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3543 if (Subtarget.hasHighWord())
3544 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3545 MVT::i32, Out64);
3546 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3547 DAG.getConstant(32, DL, MVT::i64));
3548 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3549 }
3550 llvm_unreachable("Unexpected bitcast combination")::llvm::llvm_unreachable_internal("Unexpected bitcast combination"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 3550)
;
3551}
3552
3553SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3554 SelectionDAG &DAG) const {
3555
3556 if (Subtarget.isTargetXPLINK64())
3557 return lowerVASTART_XPLINK(Op, DAG);
3558 else
3559 return lowerVASTART_ELF(Op, DAG);
3560}
3561
3562SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
3563 SelectionDAG &DAG) const {
3564 MachineFunction &MF = DAG.getMachineFunction();
3565 SystemZMachineFunctionInfo *FuncInfo =
3566 MF.getInfo<SystemZMachineFunctionInfo>();
3567
3568 SDLoc DL(Op);
3569
3570 // vastart just stores the address of the VarArgsFrameIndex slot into the
3571 // memory location argument.
3572 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3573 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3574 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3575 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
3576 MachinePointerInfo(SV));
3577}
3578
3579SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
3580 SelectionDAG &DAG) const {
3581 MachineFunction &MF = DAG.getMachineFunction();
3582 SystemZMachineFunctionInfo *FuncInfo =
3583 MF.getInfo<SystemZMachineFunctionInfo>();
3584 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3585
3586 SDValue Chain = Op.getOperand(0);
3587 SDValue Addr = Op.getOperand(1);
3588 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3589 SDLoc DL(Op);
3590
3591 // The initial values of each field.
3592 const unsigned NumFields = 4;
3593 SDValue Fields[NumFields] = {
3594 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3595 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3596 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3597 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3598 };
3599
3600 // Store each field into its respective slot.
3601 SDValue MemOps[NumFields];
3602 unsigned Offset = 0;
3603 for (unsigned I = 0; I < NumFields; ++I) {
3604 SDValue FieldAddr = Addr;
3605 if (Offset != 0)
3606 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3607 DAG.getIntPtrConstant(Offset, DL));
3608 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3609 MachinePointerInfo(SV, Offset));
3610 Offset += 8;
3611 }
3612 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3613}
3614
3615SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3616 SelectionDAG &DAG) const {
3617 SDValue Chain = Op.getOperand(0);
3618 SDValue DstPtr = Op.getOperand(1);
3619 SDValue SrcPtr = Op.getOperand(2);
3620 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3621 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3622 SDLoc DL(Op);
3623
3624 uint32_t Sz =
3625 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
3626 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
3627 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3628 /*isTailCall*/ false, MachinePointerInfo(DstSV),
3629 MachinePointerInfo(SrcSV));
3630}
3631
3632SDValue
3633SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
3634 SelectionDAG &DAG) const {
3635 if (Subtarget.isTargetXPLINK64())
3636 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
3637 else
3638 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
3639}
3640
3641SDValue
3642SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
3643 SelectionDAG &DAG) const {
3644 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3645 MachineFunction &MF = DAG.getMachineFunction();
3646 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3647 SDValue Chain = Op.getOperand(0);
3648 SDValue Size = Op.getOperand(1);
3649 SDValue Align = Op.getOperand(2);
3650 SDLoc DL(Op);
3651
3652 // If user has set the no alignment function attribute, ignore
3653 // alloca alignments.
3654 uint64_t AlignVal =
3655 (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3656
3657 uint64_t StackAlign = TFI->getStackAlignment();
3658 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3659 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3660
3661 SDValue NeededSpace = Size;
3662
3663 // Add extra space for alignment if needed.
3664 EVT PtrVT = getPointerTy(MF.getDataLayout());
3665 if (ExtraAlignSpace)
3666 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
3667 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3668
3669 bool IsSigned = false;
3670 bool DoesNotReturn = false;
3671 bool IsReturnValueUsed = false;
3672 EVT VT = Op.getValueType();
3673 SDValue AllocaCall =
3674 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, makeArrayRef(NeededSpace),
3675 CallingConv::C, IsSigned, DL, DoesNotReturn,
3676 IsReturnValueUsed)
3677 .first;
3678
3679 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
3680 // to end of call in order to ensure it isn't broken up from the call
3681 // sequence.
3682 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
3683 Register SPReg = Regs.getStackPointerRegister();
3684 Chain = AllocaCall.getValue(1);
3685 SDValue Glue = AllocaCall.getValue(2);
3686 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
3687 Chain = NewSPRegNode.getValue(1);
3688
3689 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
3690 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
3691 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
3692
3693 // Dynamically realign if needed.
3694 if (ExtraAlignSpace) {
3695 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3696 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3697 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
3698 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
3699 }
3700
3701 SDValue Ops[2] = {Result, Chain};
3702 return DAG.getMergeValues(Ops, DL);
3703}
3704
3705SDValue
3706SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
3707 SelectionDAG &DAG) const {
3708 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3709 MachineFunction &MF = DAG.getMachineFunction();
3710 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3711 bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3712
3713 SDValue Chain = Op.getOperand(0);
3714 SDValue Size = Op.getOperand(1);
3715 SDValue Align = Op.getOperand(2);
3716 SDLoc DL(Op);
3717
3718 // If user has set the no alignment function attribute, ignore
3719 // alloca alignments.
3720 uint64_t AlignVal =
3721 (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3722
3723 uint64_t StackAlign = TFI->getStackAlignment();
3724 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3725 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3726
3727 Register SPReg = getStackPointerRegisterToSaveRestore();
3728 SDValue NeededSpace = Size;
3729
3730 // Get a reference to the stack pointer.
3731 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
3732
3733 // If we need a backchain, save it now.
3734 SDValue Backchain;
3735 if (StoreBackchain)
3736 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
3737 MachinePointerInfo());
3738
3739 // Add extra space for alignment if needed.
3740 if (ExtraAlignSpace)
3741 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
3742 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3743
3744 // Get the new stack pointer value.
3745 SDValue NewSP;
3746 if (hasInlineStackProbe(MF)) {
3747 NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
3748 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
3749 Chain = NewSP.getValue(1);
3750 }
3751 else {
3752 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
3753 // Copy the new stack pointer back.
3754 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
3755 }
3756
3757 // The allocated data lives above the 160 bytes allocated for the standard
3758 // frame, plus any outgoing stack arguments. We don't know how much that
3759 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3760 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3761 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
3762
3763 // Dynamically realign if needed.
3764 if (RequiredAlign > StackAlign) {
3765 Result =
3766 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
3767 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3768 Result =
3769 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
3770 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
3771 }
3772
3773 if (StoreBackchain)
3774 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
3775 MachinePointerInfo());
3776
3777 SDValue Ops[2] = { Result, Chain };
3778 return DAG.getMergeValues(Ops, DL);
3779}
3780
3781SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3782 SDValue Op, SelectionDAG &DAG) const {
3783 SDLoc DL(Op);
3784
3785 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3786}
3787
3788SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
3789 SelectionDAG &DAG) const {
3790 EVT VT = Op.getValueType();
3791 SDLoc DL(Op);
3792 SDValue Ops[2];
3793 if (is32Bit(VT))
3794 // Just do a normal 64-bit multiplication and extract the results.
3795 // We define this so that it can be used for constant division.
3796 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
3797 Op.getOperand(1), Ops[1], Ops[0]);
3798 else if (Subtarget.hasMiscellaneousExtensions2())
3799 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3800 // the high result in the even register. ISD::SMUL_LOHI is defined to
3801 // return the low half first, so the results are in reverse order.
3802 lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI,
3803 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3804 else {
3805 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3806 //
3807 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3808 //
3809 // but using the fact that the upper halves are either all zeros
3810 // or all ones:
3811 //
3812 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3813 //
3814 // and grouping the right terms together since they are quicker than the
3815 // multiplication:
3816 //
3817 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3818 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
3819 SDValue LL = Op.getOperand(0);
3820 SDValue RL = Op.getOperand(1);
3821 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
3822 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
3823 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3824 // the high result in the even register. ISD::SMUL_LOHI is defined to
3825 // return the low half first, so the results are in reverse order.
3826 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
3827 LL, RL, Ops[1], Ops[0]);
3828 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
3829 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
3830 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
3831 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
3832 }
3833 return DAG.getMergeValues(Ops, DL);
3834}
3835
3836SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
3837 SelectionDAG &DAG) const {
3838 EVT VT = Op.getValueType();
3839 SDLoc DL(Op);
3840 SDValue Ops[2];
3841 if (is32Bit(VT))
3842 // Just do a normal 64-bit multiplication and extract the results.
3843 // We define this so that it can be used for constant division.
3844 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
3845 Op.getOperand(1), Ops[1], Ops[0]);
3846 else
3847 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3848 // the high result in the even register. ISD::UMUL_LOHI is defined to
3849 // return the low half first, so the results are in reverse order.
3850 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
3851 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3852 return DAG.getMergeValues(Ops, DL);
3853}
3854
3855SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
3856 SelectionDAG &DAG) const {
3857 SDValue Op0 = Op.getOperand(0);
3858 SDValue Op1 = Op.getOperand(1);
3859 EVT VT = Op.getValueType();
3860 SDLoc DL(Op);
3861
3862 // We use DSGF for 32-bit division. This means the first operand must
3863 // always be 64-bit, and the second operand should be 32-bit whenever
3864 // that is possible, to improve performance.
3865 if (is32Bit(VT))
3866 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
3867 else if (DAG.ComputeNumSignBits(Op1) > 32)
3868 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
3869
3870 // DSG(F) returns the remainder in the even register and the
3871 // quotient in the odd register.
3872 SDValue Ops[2];
3873 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
3874 return DAG.getMergeValues(Ops, DL);
3875}
3876
3877SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
3878 SelectionDAG &DAG) const {
3879 EVT VT = Op.getValueType();
3880 SDLoc DL(Op);
3881
3882 // DL(G) returns the remainder in the even register and the
3883 // quotient in the odd register.
3884 SDValue Ops[2];
3885 lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM,
3886 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3887 return DAG.getMergeValues(Ops, DL);
3888}
3889
3890SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
3891 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation")(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"Should be 64-bit operation") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Should be 64-bit operation\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 3891, __extension__
__PRETTY_FUNCTION__))
;
3892
3893 // Get the known-zero masks for each operand.
3894 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
3895 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
3896 DAG.computeKnownBits(Ops[1])};
3897
3898 // See if the upper 32 bits of one operand and the lower 32 bits of the
3899 // other are known zero. They are the low and high operands respectively.
3900 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
3901 Known[1].Zero.getZExtValue() };
3902 unsigned High, Low;
3903 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
3904 High = 1, Low = 0;
3905 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
3906 High = 0, Low = 1;
3907 else
3908 return Op;
3909
3910 SDValue LowOp = Ops[Low];
3911 SDValue HighOp = Ops[High];
3912
3913 // If the high part is a constant, we're better off using IILH.
3914 if (HighOp.getOpcode() == ISD::Constant)
3915 return Op;
3916
3917 // If the low part is a constant that is outside the range of LHI,
3918 // then we're better off using IILF.
3919 if (LowOp.getOpcode() == ISD::Constant) {
3920 int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
3921 if (!isInt<16>(Value))
3922 return Op;
3923 }
3924
3925 // Check whether the high part is an AND that doesn't change the
3926 // high 32 bits and just masks out low bits. We can skip it if so.
3927 if (HighOp.getOpcode() == ISD::AND &&
3928 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
3929 SDValue HighOp0 = HighOp.getOperand(0);
3930 uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
3931 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
3932 HighOp = HighOp0;
3933 }
3934
3935 // Take advantage of the fact that all GR32 operations only change the
3936 // low 32 bits by truncating Low to an i32 and inserting it directly
3937 // using a subreg. The interesting cases are those where the truncation
3938 // can be folded.
3939 SDLoc DL(Op);
3940 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
3941 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
3942 MVT::i64, HighOp, Low32);
3943}
3944
3945// Lower SADDO/SSUBO/UADDO/USUBO nodes.
3946SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
3947 SelectionDAG &DAG) const {
3948 SDNode *N = Op.getNode();
3949 SDValue LHS = N->getOperand(0);
3950 SDValue RHS = N->getOperand(1);
3951 SDLoc DL(N);
3952 unsigned BaseOp = 0;
3953 unsigned CCValid = 0;
3954 unsigned CCMask = 0;
3955
3956 switch (Op.getOpcode()) {
3957 default: llvm_unreachable("Unknown instruction!")::llvm::llvm_unreachable_internal("Unknown instruction!", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 3957)
;
3958 case ISD::SADDO:
3959 BaseOp = SystemZISD::SADDO;
3960 CCValid = SystemZ::CCMASK_ARITH;
3961 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
3962 break;
3963 case ISD::SSUBO:
3964 BaseOp = SystemZISD::SSUBO;
3965 CCValid = SystemZ::CCMASK_ARITH;
3966 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
3967 break;
3968 case ISD::UADDO:
3969 BaseOp = SystemZISD::UADDO;
3970 CCValid = SystemZ::CCMASK_LOGICAL;
3971 CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
3972 break;
3973 case ISD::USUBO:
3974 BaseOp = SystemZISD::USUBO;
3975 CCValid = SystemZ::CCMASK_LOGICAL;
3976 CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
3977 break;
3978 }
3979
3980 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
3981 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
3982
3983 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3984 if (N->getValueType(1) == MVT::i1)
3985 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3986
3987 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3988}
3989
3990static bool isAddCarryChain(SDValue Carry) {
3991 while (Carry.getOpcode() == ISD::ADDCARRY)
3992 Carry = Carry.getOperand(2);
3993 return Carry.getOpcode() == ISD::UADDO;
3994}
3995
3996static bool isSubBorrowChain(SDValue Carry) {
3997 while (Carry.getOpcode() == ISD::SUBCARRY)
3998 Carry = Carry.getOperand(2);
3999 return Carry.getOpcode() == ISD::USUBO;
4000}
4001
4002// Lower ADDCARRY/SUBCARRY nodes.
4003SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
4004 SelectionDAG &DAG) const {
4005
4006 SDNode *N = Op.getNode();
4007 MVT VT = N->getSimpleValueType(0);
4008
4009 // Let legalize expand this if it isn't a legal type yet.
4010 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4011 return SDValue();
4012
4013 SDValue LHS = N->getOperand(0);
4014 SDValue RHS = N->getOperand(1);
4015 SDValue Carry = Op.getOperand(2);
4016 SDLoc DL(N);
4017 unsigned BaseOp = 0;
4018 unsigned CCValid = 0;
4019 unsigned CCMask = 0;
4020
4021 switch (Op.getOpcode()) {
4022 default: llvm_unreachable("Unknown instruction!")::llvm::llvm_unreachable_internal("Unknown instruction!", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 4022)
;
4023 case ISD::ADDCARRY:
4024 if (!isAddCarryChain(Carry))
4025 return SDValue();
4026
4027 BaseOp = SystemZISD::ADDCARRY;
4028 CCValid = SystemZ::CCMASK_LOGICAL;
4029 CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
4030 break;
4031 case ISD::SUBCARRY:
4032 if (!isSubBorrowChain(Carry))
4033 return SDValue();
4034
4035 BaseOp = SystemZISD::SUBCARRY;
4036 CCValid = SystemZ::CCMASK_LOGICAL;
4037 CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
4038 break;
4039 }
4040
4041 // Set the condition code from the carry flag.
4042 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4043 DAG.getConstant(CCValid, DL, MVT::i32),
4044 DAG.getConstant(CCMask, DL, MVT::i32));
4045
4046 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4047 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4048
4049 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4050 if (N->getValueType(1) == MVT::i1)
4051 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4052
4053 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4054}
4055
4056SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4057 SelectionDAG &DAG) const {
4058 EVT VT = Op.getValueType();
4059 SDLoc DL(Op);
4060 Op = Op.getOperand(0);
4061
4062 // Handle vector types via VPOPCT.
4063 if (VT.isVector()) {
4064 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4065 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4066 switch (VT.getScalarSizeInBits()) {
4067 case 8:
4068 break;
4069 case 16: {
4070 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4071 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
4072 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
4073 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4074 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
4075 break;
4076 }
4077 case 32: {
4078 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4079 DAG.getConstant(0, DL, MVT::i32));
4080 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4081 break;
4082 }
4083 case 64: {
4084 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4085 DAG.getConstant(0, DL, MVT::i32));
4086 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
4087 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4088 break;
4089 }
4090 default:
4091 llvm_unreachable("Unexpected type")::llvm::llvm_unreachable_internal("Unexpected type", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 4091)
;
4092 }
4093 return Op;
4094 }
4095
4096 // Get the known-zero mask for the operand.
4097 KnownBits Known = DAG.computeKnownBits(Op);
4098 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
4099 if (NumSignificantBits == 0)
4100 return DAG.getConstant(0, DL, VT);
4101
4102 // Skip known-zero high parts of the operand.
4103 int64_t OrigBitSize = VT.getSizeInBits();
4104 int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
4105 BitSize = std::min(BitSize, OrigBitSize);
4106
4107 // The POPCNT instruction counts the number of bits in each byte.
4108 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
4109 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
4110 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
4111
4112 // Add up per-byte counts in a binary tree. All bits of Op at
4113 // position larger than BitSize remain zero throughout.
4114 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
4115 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
4116 if (BitSize != OrigBitSize)
4117 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
4118 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
4119 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4120 }
4121
4122 // Extract overall result from high byte.
4123 if (BitSize > 8)
4124 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
4125 DAG.getConstant(BitSize - 8, DL, VT));
4126
4127 return Op;
4128}
4129
4130SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
4131 SelectionDAG &DAG) const {
4132 SDLoc DL(Op);
4133 AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
4134 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
4135 SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
4136 cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
4137
4138 // The only fence that needs an instruction is a sequentially-consistent
4139 // cross-thread fence.
4140 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
4141 FenceSSID == SyncScope::System) {
4142 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
4143 Op.getOperand(0)),
4144 0);
4145 }
4146
4147 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4148 return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
4149}
4150
4151// Op is an atomic load. Lower it into a normal volatile load.
4152SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
4153 SelectionDAG &DAG) const {
4154 auto *Node = cast<AtomicSDNode>(Op.getNode());
4155 return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
4156 Node->getChain(), Node->getBasePtr(),
4157 Node->getMemoryVT(), Node->getMemOperand());
4158}
4159
4160// Op is an atomic store. Lower it into a normal volatile store.
4161SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
4162 SelectionDAG &DAG) const {
4163 auto *Node = cast<AtomicSDNode>(Op.getNode());
4164 SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
4165 Node->getBasePtr(), Node->getMemoryVT(),
4166 Node->getMemOperand());
4167 // We have to enforce sequential consistency by performing a
4168 // serialization operation after the store.
4169 if (Node->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent)
4170 Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
4171 MVT::Other, Chain), 0);
4172 return Chain;
4173}
4174
4175// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
4176// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
4177SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
4178 SelectionDAG &DAG,
4179 unsigned Opcode) const {
4180 auto *Node = cast<AtomicSDNode>(Op.getNode());
4181
4182 // 32-bit operations need no code outside the main loop.
4183 EVT NarrowVT = Node->getMemoryVT();
4184 EVT WideVT = MVT::i32;
4185 if (NarrowVT == WideVT)
4186 return Op;
4187
4188 int64_t BitSize = NarrowVT.getSizeInBits();
4189 SDValue ChainIn = Node->getChain();
4190 SDValue Addr = Node->getBasePtr();
4191 SDValue Src2 = Node->getVal();
4192 MachineMemOperand *MMO = Node->getMemOperand();
4193 SDLoc DL(Node);
4194 EVT PtrVT = Addr.getValueType();
4195
4196 // Convert atomic subtracts of constants into additions.
4197 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
4198 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
4199 Opcode = SystemZISD::ATOMIC_LOADW_ADD;
4200 Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
4201 }
4202
4203 // Get the address of the containing word.
4204 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4205 DAG.getConstant(-4, DL, PtrVT));
4206
4207 // Get the number of bits that the word must be rotated left in order
4208 // to bring the field to the top bits of a GR32.
4209 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4210 DAG.getConstant(3, DL, PtrVT));
4211 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4212
4213 // Get the complementing shift amount, for rotating a field in the top
4214 // bits back to its proper position.
4215 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4216 DAG.getConstant(0, DL, WideVT), BitShift);
4217
4218 // Extend the source operand to 32 bits and prepare it for the inner loop.
4219 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
4220 // operations require the source to be shifted in advance. (This shift
4221 // can be folded if the source is constant.) For AND and NAND, the lower
4222 // bits must be set, while for other opcodes they should be left clear.
4223 if (Opcode != SystemZISD::ATOMIC_SWAPW)
4224 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
4225 DAG.getConstant(32 - BitSize, DL, WideVT));
4226 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
4227 Opcode == SystemZISD::ATOMIC_LOADW_NAND)
4228 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
4229 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
4230
4231 // Construct the ATOMIC_LOADW_* node.
4232 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
4233 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
4234 DAG.getConstant(BitSize, DL, WideVT) };
4235 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
4236 NarrowVT, MMO);
4237
4238 // Rotate the result of the final CS so that the field is in the lower
4239 // bits of a GR32, then truncate it.
4240 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
4241 DAG.getConstant(BitSize, DL, WideVT));
4242 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
4243
4244 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
4245 return DAG.getMergeValues(RetOps, DL);
4246}
4247
4248// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
4249// into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
4250// operations into additions.
4251SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
4252 SelectionDAG &DAG) const {
4253 auto *Node = cast<AtomicSDNode>(Op.getNode());
4254 EVT MemVT = Node->getMemoryVT();
4255 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
4256 // A full-width operation.
4257 assert(Op.getValueType() == MemVT && "Mismatched VTs")(static_cast <bool> (Op.getValueType() == MemVT &&
"Mismatched VTs") ? void (0) : __assert_fail ("Op.getValueType() == MemVT && \"Mismatched VTs\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4257, __extension__
__PRETTY_FUNCTION__))
;
4258 SDValue Src2 = Node->getVal();
4259 SDValue NegSrc2;
4260 SDLoc DL(Src2);
4261
4262 if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
4263 // Use an addition if the operand is constant and either LAA(G) is
4264 // available or the negative value is in the range of A(G)FHI.
4265 int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
4266 if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
4267 NegSrc2 = DAG.getConstant(Value, DL, MemVT);
4268 } else if (Subtarget.hasInterlockedAccess1())
4269 // Use LAA(G) if available.
4270 NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
4271 Src2);
4272
4273 if (NegSrc2.getNode())
4274 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
4275 Node->getChain(), Node->getBasePtr(), NegSrc2,
4276 Node->getMemOperand());
4277
4278 // Use the node as-is.
4279 return Op;
4280 }
4281
4282 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
4283}
4284
4285// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
4286SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
4287 SelectionDAG &DAG) const {
4288 auto *Node = cast<AtomicSDNode>(Op.getNode());
4289 SDValue ChainIn = Node->getOperand(0);
4290 SDValue Addr = Node->getOperand(1);
4291 SDValue CmpVal = Node->getOperand(2);
4292 SDValue SwapVal = Node->getOperand(3);
4293 MachineMemOperand *MMO = Node->getMemOperand();
4294 SDLoc DL(Node);
4295
4296 // We have native support for 32-bit and 64-bit compare and swap, but we
4297 // still need to expand extracting the "success" result from the CC.
4298 EVT NarrowVT = Node->getMemoryVT();
4299 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
4300 if (NarrowVT == WideVT) {
4301 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4302 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
4303 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP,
4304 DL, Tys, Ops, NarrowVT, MMO);
4305 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4306 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);
4307
4308 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
4309 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4310 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4311 return SDValue();
4312 }
4313
4314 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
4315 // via a fullword ATOMIC_CMP_SWAPW operation.
4316 int64_t BitSize = NarrowVT.getSizeInBits();
4317 EVT PtrVT = Addr.getValueType();
4318
4319 // Get the address of the containing word.
4320 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4321 DAG.getConstant(-4, DL, PtrVT));
4322
4323 // Get the number of bits that the word must be rotated left in order
4324 // to bring the field to the top bits of a GR32.
4325 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4326 DAG.getConstant(3, DL, PtrVT));
4327 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4328
4329 // Get the complementing shift amount, for rotating a field in the top
4330 // bits back to its proper position.
4331 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4332 DAG.getConstant(0, DL, WideVT), BitShift);
4333
4334 // Construct the ATOMIC_CMP_SWAPW node.
4335 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4336 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
4337 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
4338 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
4339 VTList, Ops, NarrowVT, MMO);
4340 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4341 SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ);
4342
4343 // emitAtomicCmpSwapW() will zero extend the result (original value).
4344 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
4345 DAG.getValueType(NarrowVT));
4346 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
4347 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4348 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4349 return SDValue();
4350}
4351
4352MachineMemOperand::Flags
4353SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
4354 // Because of how we convert atomic_load and atomic_store to normal loads and
4355 // stores in the DAG, we need to ensure that the MMOs are marked volatile
4356 // since DAGCombine hasn't been updated to account for atomic, but non
4357 // volatile loads. (See D57601)
4358 if (auto *SI = dyn_cast<StoreInst>(&I))
4359 if (SI->isAtomic())
4360 return MachineMemOperand::MOVolatile;
4361 if (auto *LI = dyn_cast<LoadInst>(&I))
4362 if (LI->isAtomic())
4363 return MachineMemOperand::MOVolatile;
4364 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
4365 if (AI->isAtomic())
4366 return MachineMemOperand::MOVolatile;
4367 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
4368 if (AI->isAtomic())
4369 return MachineMemOperand::MOVolatile;
4370 return MachineMemOperand::MONone;
4371}
4372
4373SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
4374 SelectionDAG &DAG) const {
4375 MachineFunction &MF = DAG.getMachineFunction();
4376 const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
4377 auto *Regs = Subtarget->getSpecialRegisters();
4378 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
4379 report_fatal_error("Variable-sized stack allocations are not supported "
4380 "in GHC calling convention");
4381 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
4382 Regs->getStackPointerRegister(), Op.getValueType());
4383}
4384
4385SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
4386 SelectionDAG &DAG) const {
4387 MachineFunction &MF = DAG.getMachineFunction();
4388 const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
4389 auto *Regs = Subtarget->getSpecialRegisters();
4390 bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
4391
4392 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
4393 report_fatal_error("Variable-sized stack allocations are not supported "
4394 "in GHC calling convention");
4395
4396 SDValue Chain = Op.getOperand(0);
4397 SDValue NewSP = Op.getOperand(1);
4398 SDValue Backchain;
4399 SDLoc DL(Op);
4400
4401 if (StoreBackchain) {
4402 SDValue OldSP = DAG.getCopyFromReg(
4403 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
4404 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4405 MachinePointerInfo());
4406 }
4407
4408 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
4409
4410 if (StoreBackchain)
4411 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4412 MachinePointerInfo());
4413
4414 return Chain;
4415}
4416
4417SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
4418 SelectionDAG &DAG) const {
4419 bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
4420 if (!IsData)
4421 // Just preserve the chain.
4422 return Op.getOperand(0);
4423
4424 SDLoc DL(Op);
4425 bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
4426 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
4427 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
4428 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
4429 Op.getOperand(1)};
4430 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
4431 Node->getVTList(), Ops,
4432 Node->getMemoryVT(), Node->getMemOperand());
4433}
4434
4435// Convert condition code in CCReg to an i32 value.
4436static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) {
4437 SDLoc DL(CCReg);
4438 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
4439 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
4440 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
4441}
4442
4443SDValue
4444SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4445 SelectionDAG &DAG) const {
4446 unsigned Opcode, CCValid;
4447 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
4448 assert(Op->getNumValues() == 2 && "Expected only CC result and chain")(static_cast <bool> (Op->getNumValues() == 2 &&
"Expected only CC result and chain") ? void (0) : __assert_fail
("Op->getNumValues() == 2 && \"Expected only CC result and chain\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4448, __extension__
__PRETTY_FUNCTION__))
;
4449 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
4450 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
4451 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
4452 return SDValue();
4453 }
4454
4455 return SDValue();
4456}
4457
4458SDValue
4459SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4460 SelectionDAG &DAG) const {
4461 unsigned Opcode, CCValid;
4462 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
4463 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
4464 if (Op->getNumValues() == 1)
4465 return getCCResult(DAG, SDValue(Node, 0));
4466 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result")(static_cast <bool> (Op->getNumValues() == 2 &&
"Expected a CC and non-CC result") ? void (0) : __assert_fail
("Op->getNumValues() == 2 && \"Expected a CC and non-CC result\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4466, __extension__
__PRETTY_FUNCTION__))
;
4467 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
4468 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
4469 }
4470
4471 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4472 switch (Id) {
4473 case Intrinsic::thread_pointer:
4474 return lowerThreadPointer(SDLoc(Op), DAG);
4475
4476 case Intrinsic::s390_vpdi:
4477 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
4478 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4479
4480 case Intrinsic::s390_vperm:
4481 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
4482 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4483
4484 case Intrinsic::s390_vuphb:
4485 case Intrinsic::s390_vuphh:
4486 case Intrinsic::s390_vuphf:
4487 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
4488 Op.getOperand(1));
4489
4490 case Intrinsic::s390_vuplhb:
4491 case Intrinsic::s390_vuplhh:
4492 case Intrinsic::s390_vuplhf:
4493 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
4494 Op.getOperand(1));
4495
4496 case Intrinsic::s390_vuplb:
4497 case Intrinsic::s390_vuplhw:
4498 case Intrinsic::s390_vuplf:
4499 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
4500 Op.getOperand(1));
4501
4502 case Intrinsic::s390_vupllb:
4503 case Intrinsic::s390_vupllh:
4504 case Intrinsic::s390_vupllf:
4505 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
4506 Op.getOperand(1));
4507
4508 case Intrinsic::s390_vsumb:
4509 case Intrinsic::s390_vsumh:
4510 case Intrinsic::s390_vsumgh:
4511 case Intrinsic::s390_vsumgf:
4512 case Intrinsic::s390_vsumqf:
4513 case Intrinsic::s390_vsumqg:
4514 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
4515 Op.getOperand(1), Op.getOperand(2));
4516 }
4517
4518 return SDValue();
4519}
4520
4521namespace {
4522// Says that SystemZISD operation Opcode can be used to perform the equivalent
4523// of a VPERM with permute vector Bytes. If Opcode takes three operands,
4524// Operand is the constant third operand, otherwise it is the number of
4525// bytes in each element of the result.
4526struct Permute {
4527 unsigned Opcode;
4528 unsigned Operand;
4529 unsigned char Bytes[SystemZ::VectorBytes];
4530};
4531}
4532
4533static const Permute PermuteForms[] = {
4534 // VMRHG
4535 { SystemZISD::MERGE_HIGH, 8,
4536 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
4537 // VMRHF
4538 { SystemZISD::MERGE_HIGH, 4,
4539 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
4540 // VMRHH
4541 { SystemZISD::MERGE_HIGH, 2,
4542 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
4543 // VMRHB
4544 { SystemZISD::MERGE_HIGH, 1,
4545 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
4546 // VMRLG
4547 { SystemZISD::MERGE_LOW, 8,
4548 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
4549 // VMRLF
4550 { SystemZISD::MERGE_LOW, 4,
4551 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
4552 // VMRLH
4553 { SystemZISD::MERGE_LOW, 2,
4554 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
4555 // VMRLB
4556 { SystemZISD::MERGE_LOW, 1,
4557 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
4558 // VPKG
4559 { SystemZISD::PACK, 4,
4560 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
4561 // VPKF
4562 { SystemZISD::PACK, 2,
4563 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
4564 // VPKH
4565 { SystemZISD::PACK, 1,
4566 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
4567 // VPDI V1, V2, 4 (low half of V1, high half of V2)
4568 { SystemZISD::PERMUTE_DWORDS, 4,
4569 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
4570 // VPDI V1, V2, 1 (high half of V1, low half of V2)
4571 { SystemZISD::PERMUTE_DWORDS, 1,
4572 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
4573};
4574
4575// Called after matching a vector shuffle against a particular pattern.
4576// Both the original shuffle and the pattern have two vector operands.
4577// OpNos[0] is the operand of the original shuffle that should be used for
4578// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
4579// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
4580// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
4581// for operands 0 and 1 of the pattern.
4582static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
4583 if (OpNos[0] < 0) {
4584 if (OpNos[1] < 0)
4585 return false;
4586 OpNo0 = OpNo1 = OpNos[1];
4587 } else if (OpNos[1] < 0) {
4588 OpNo0 = OpNo1 = OpNos[0];
4589 } else {
4590 OpNo0 = OpNos[0];
4591 OpNo1 = OpNos[1];
4592 }
4593 return true;
4594}
4595
4596// Bytes is a VPERM-like permute vector, except that -1 is used for
4597// undefined bytes. Return true if the VPERM can be implemented using P.
4598// When returning true set OpNo0 to the VPERM operand that should be
4599// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
4600//
4601// For example, if swapping the VPERM operands allows P to match, OpNo0
4602// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
4603// operand, but rewriting it to use two duplicated operands allows it to
4604// match P, then OpNo0 and OpNo1 will be the same.
4605static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
4606 unsigned &OpNo0, unsigned &OpNo1) {
4607 int OpNos[] = { -1, -1 };
4608 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
4609 int Elt = Bytes[I];
4610 if (Elt >= 0) {
4611 // Make sure that the two permute vectors use the same suboperand
4612 // byte number. Only the operand numbers (the high bits) are
4613 // allowed to differ.
4614 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
4615 return false;
4616 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
4617 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
4618 // Make sure that the operand mappings are consistent with previous
4619 // elements.
4620 if (OpNos[ModelOpNo] == 1 - RealOpNo)
4621 return false;
4622 OpNos[ModelOpNo] = RealOpNo;
4623 }
4624 }
4625 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
4626}
4627
4628// As above, but search for a matching permute.
4629static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
4630 unsigned &OpNo0, unsigned &OpNo1) {
4631 for (auto &P : PermuteForms)
4632 if (matchPermute(Bytes, P, OpNo0, OpNo1))
4633 return &P;
4634 return nullptr;
4635}
4636
4637// Bytes is a VPERM-like permute vector, except that -1 is used for
4638// undefined bytes. This permute is an operand of an outer permute.
4639// See whether redistributing the -1 bytes gives a shuffle that can be
4640// implemented using P. If so, set Transform to a VPERM-like permute vector
4641// that, when applied to the result of P, gives the original permute in Bytes.
4642static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
4643 const Permute &P,
4644 SmallVectorImpl<int> &Transform) {
4645 unsigned To = 0;
4646 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
4647 int Elt = Bytes[From];
4648 if (Elt < 0)
4649 // Byte number From of the result is undefined.
4650 Transform[From] = -1;
4651 else {
4652 while (P.Bytes[To] != Elt) {
4653 To += 1;
4654 if (To == SystemZ::VectorBytes)
4655 return false;
4656 }
4657 Transform[From] = To;
4658 }
4659 }
4660 return true;
4661}
4662
4663// As above, but search for a matching permute.
4664static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
4665 SmallVectorImpl<int> &Transform) {
4666 for (auto &P : PermuteForms)
4667 if (matchDoublePermute(Bytes, P, Transform))
4668 return &P;
4669 return nullptr;
4670}
4671
4672// Convert the mask of the given shuffle op into a byte-level mask,
4673// as if it had type vNi8.
4674static bool getVPermMask(SDValue ShuffleOp,
4675 SmallVectorImpl<int> &Bytes) {
4676 EVT VT = ShuffleOp.getValueType();
4677 unsigned NumElements = VT.getVectorNumElements();
4678 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4679
4680 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
4681 Bytes.resize(NumElements * BytesPerElement, -1);
4682 for (unsigned I = 0; I < NumElements; ++I) {
4683 int Index = VSN->getMaskElt(I);
4684 if (Index >= 0)
4685 for (unsigned J = 0; J < BytesPerElement; ++J)
4686 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
4687 }
4688 return true;
4689 }
4690 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
4691 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
4692 unsigned Index = ShuffleOp.getConstantOperandVal(1);
4693 Bytes.resize(NumElements * BytesPerElement, -1);
4694 for (unsigned I = 0; I < NumElements; ++I)
4695 for (unsigned J = 0; J < BytesPerElement; ++J)
4696 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
4697 return true;
4698 }
4699 return false;
4700}
4701
4702// Bytes is a VPERM-like permute vector, except that -1 is used for
4703// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
4704// the result come from a contiguous sequence of bytes from one input.
4705// Set Base to the selector for the first byte if so.
4706static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
4707 unsigned BytesPerElement, int &Base) {
4708 Base = -1;
4709 for (unsigned I = 0; I < BytesPerElement; ++I) {
4710 if (Bytes[Start + I] >= 0) {
4711 unsigned Elem = Bytes[Start + I];
4712 if (Base < 0) {
4713 Base = Elem - I;
4714 // Make sure the bytes would come from one input operand.
4715 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
4716 return false;
4717 } else if (unsigned(Base) != Elem - I)
4718 return false;
4719 }
4720 }
4721 return true;
4722}
4723
4724// Bytes is a VPERM-like permute vector, except that -1 is used for
4725// undefined bytes. Return true if it can be performed using VSLDB.
4726// When returning true, set StartIndex to the shift amount and OpNo0
4727// and OpNo1 to the VPERM operands that should be used as the first
4728// and second shift operand respectively.
4729static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
4730 unsigned &StartIndex, unsigned &OpNo0,
4731 unsigned &OpNo1) {
4732 int OpNos[] = { -1, -1 };
4733 int Shift = -1;
4734 for (unsigned I = 0; I < 16; ++I) {
4735 int Index = Bytes[I];
4736 if (Index >= 0) {
4737 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
4738 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
4739 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
4740 if (Shift < 0)
4741 Shift = ExpectedShift;
4742 else if (Shift != ExpectedShift)
4743 return false;
4744 // Make sure that the operand mappings are consistent with previous
4745 // elements.
4746 if (OpNos[ModelOpNo] == 1 - RealOpNo)
4747 return false;
4748 OpNos[ModelOpNo] = RealOpNo;
4749 }
4750 }
4751 StartIndex = Shift;
4752 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
4753}
4754
4755// Create a node that performs P on operands Op0 and Op1, casting the
4756// operands to the appropriate type. The type of the result is determined by P.
4757static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
4758 const Permute &P, SDValue Op0, SDValue Op1) {
4759 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
4760 // elements of a PACK are twice as wide as the outputs.
4761 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
4762 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
4763 P.Operand);
4764 // Cast both operands to the appropriate type.
4765 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
4766 SystemZ::VectorBytes / InBytes);
4767 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
4768 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
4769 SDValue Op;
4770 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
4771 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
4772 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
4773 } else if (P.Opcode == SystemZISD::PACK) {
4774 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
4775 SystemZ::VectorBytes / P.Operand);
4776 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
4777 } else {
4778 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
4779 }
4780 return Op;
4781}
4782
4783static bool isZeroVector(SDValue N) {
4784 if (N->getOpcode() == ISD::BITCAST)
4785 N = N->getOperand(0);
4786 if (N->getOpcode() == ISD::SPLAT_VECTOR)
4787 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
4788 return Op->getZExtValue() == 0;
4789 return ISD::isBuildVectorAllZeros(N.getNode());
4790}
4791
4792// Return the index of the zero/undef vector, or UINT32_MAX if not found.
4793static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
4794 for (unsigned I = 0; I < Num ; I++)
4795 if (isZeroVector(Ops[I]))
4796 return I;
4797 return UINT32_MAX(4294967295U);
4798}
4799
4800// Bytes is a VPERM-like permute vector, except that -1 is used for
4801// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
4802// VSLDB or VPERM.
4803static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
4804 SDValue *Ops,
4805 const SmallVectorImpl<int> &Bytes) {
4806 for (unsigned I = 0; I < 2; ++I)
4807 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
4808
4809 // First see whether VSLDB can be used.
4810 unsigned StartIndex, OpNo0, OpNo1;
4811 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
4812 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
4813 Ops[OpNo1],
4814 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
4815
4816 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
4817 // eliminate a zero vector by reusing any zero index in the permute vector.
4818 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
4819 if (ZeroVecIdx != UINT32_MAX(4294967295U)) {
4820 bool MaskFirst = true;
4821 int ZeroIdx = -1;
4822 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
4823 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
4824 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
4825 if (OpNo == ZeroVecIdx && I == 0) {
4826 // If the first byte is zero, use mask as first operand.
4827 ZeroIdx = 0;
4828 break;
4829 }
4830 if (OpNo != ZeroVecIdx && Byte == 0) {
4831 // If mask contains a zero, use it by placing that vector first.
4832 ZeroIdx = I + SystemZ::VectorBytes;
4833 MaskFirst = false;
4834 break;
4835 }
4836 }
4837 if (ZeroIdx != -1) {
4838 SDValue IndexNodes[SystemZ::VectorBytes];
4839 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
4840 if (Bytes[I] >= 0) {
4841 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
4842 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
4843 if (OpNo == ZeroVecIdx)
4844 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
4845 else {
4846 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
4847 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
4848 }
4849 } else
4850 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
4851 }
4852 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
4853 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
4854 if (MaskFirst)
4855 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
4856 Mask);
4857 else
4858 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
4859 Mask);
4860 }
4861 }
4862
4863 SDValue IndexNodes[SystemZ::VectorBytes];
4864 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4865 if (Bytes[I] >= 0)
4866 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
4867 else
4868 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
4869 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
4870 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
4871 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
4872}
4873
4874namespace {
4875// Describes a general N-operand vector shuffle.
4876struct GeneralShuffle {
4877 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX(2147483647 *2U +1U)) {}
4878 void addUndef();
4879 bool add(SDValue, unsigned);
4880 SDValue getNode(SelectionDAG &, const SDLoc &);
4881 void tryPrepareForUnpack();
4882 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
4883 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
4884
4885 // The operands of the shuffle.
4886 SmallVector<SDValue, SystemZ::VectorBytes> Ops;
4887
4888 // Index I is -1 if byte I of the result is undefined. Otherwise the
4889 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
4890 // Bytes[I] / SystemZ::VectorBytes.
4891 SmallVector<int, SystemZ::VectorBytes> Bytes;
4892
4893 // The type of the shuffle result.
4894 EVT VT;
4895
4896 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
4897 unsigned UnpackFromEltSize;
4898};
4899}
4900
4901// Add an extra undefined element to the shuffle.
4902void GeneralShuffle::addUndef() {
4903 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4904 for (unsigned I = 0; I < BytesPerElement; ++I)
4905 Bytes.push_back(-1);
4906}
4907
4908// Add an extra element to the shuffle, taking it from element Elem of Op.
4909// A null Op indicates a vector input whose value will be calculated later;
4910// there is at most one such input per shuffle and it always has the same
4911// type as the result. Aborts and returns false if the source vector elements
4912// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
4913// LLVM they become implicitly extended, but this is rare and not optimized.
4914bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
4915 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4916
4917 // The source vector can have wider elements than the result,
4918 // either through an explicit TRUNCATE or because of type legalization.
4919 // We want the least significant part.
4920 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
4921 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
4922
4923 // Return false if the source elements are smaller than their destination
4924 // elements.
4925 if (FromBytesPerElement < BytesPerElement)
4926 return false;
4927
4928 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
4929 (FromBytesPerElement - BytesPerElement));
4930
4931 // Look through things like shuffles and bitcasts.
4932 while (Op.getNode()) {
4933 if (Op.getOpcode() == ISD::BITCAST)
4934 Op = Op.getOperand(0);
4935 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
4936 // See whether the bytes we need come from a contiguous part of one
4937 // operand.
4938 SmallVector<int, SystemZ::VectorBytes> OpBytes;
4939 if (!getVPermMask(Op, OpBytes))
4940 break;
4941 int NewByte;
4942 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
4943 break;
4944 if (NewByte < 0) {
4945 addUndef();
4946 return true;
4947 }
4948 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
4949 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
4950 } else if (Op.isUndef()) {
4951 addUndef();
4952 return true;
4953 } else
4954 break;
4955 }
4956
4957 // Make sure that the source of the extraction is in Ops.
4958 unsigned OpNo = 0;
4959 for (; OpNo < Ops.size(); ++OpNo)
4960 if (Ops[OpNo] == Op)
4961 break;
4962 if (OpNo == Ops.size())
4963 Ops.push_back(Op);
4964
4965 // Add the element to Bytes.
4966 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
4967 for (unsigned I = 0; I < BytesPerElement; ++I)
4968 Bytes.push_back(Base + I);
4969
4970 return true;
4971}
4972
4973// Return SDNodes for the completed shuffle.
4974SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
4975 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector")(static_cast <bool> (Bytes.size() == SystemZ::VectorBytes
&& "Incomplete vector") ? void (0) : __assert_fail (
"Bytes.size() == SystemZ::VectorBytes && \"Incomplete vector\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4975, __extension__
__PRETTY_FUNCTION__))
;
4976
4977 if (Ops.size() == 0)
4978 return DAG.getUNDEF(VT);
4979
4980 // Use a single unpack if possible as the last operation.
4981 tryPrepareForUnpack();
4982
4983 // Make sure that there are at least two shuffle operands.
4984 if (Ops.size() == 1)
4985 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
4986
4987 // Create a tree of shuffles, deferring root node until after the loop.
4988 // Try to redistribute the undefined elements of non-root nodes so that
4989 // the non-root shuffles match something like a pack or merge, then adjust
4990 // the parent node's permute vector to compensate for the new order.
4991 // Among other things, this copes with vectors like <2 x i16> that were
4992 // padded with undefined elements during type legalization.
4993 //
4994 // In the best case this redistribution will lead to the whole tree
4995 // using packs and merges. It should rarely be a loss in other cases.
4996 unsigned Stride = 1;
4997 for (; Stride * 2 < Ops.size(); Stride *= 2) {
4998 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
4999 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
5000
5001 // Create a mask for just these two operands.
5002 SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes);
5003 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5004 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
5005 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
5006 if (OpNo == I)
5007 NewBytes[J] = Byte;
5008 else if (OpNo == I + Stride)
5009 NewBytes[J] = SystemZ::VectorBytes + Byte;
5010 else
5011 NewBytes[J] = -1;
5012 }
5013 // See if it would be better to reorganize NewMask to avoid using VPERM.
5014 SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
5015 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
5016 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
5017 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
5018 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5019 if (NewBytes[J] >= 0) {
5020 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&(static_cast <bool> (unsigned(NewBytesMap[J]) < SystemZ
::VectorBytes && "Invalid double permute") ? void (0)
: __assert_fail ("unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && \"Invalid double permute\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5021, __extension__
__PRETTY_FUNCTION__))
5021 "Invalid double permute")(static_cast <bool> (unsigned(NewBytesMap[J]) < SystemZ
::VectorBytes && "Invalid double permute") ? void (0)
: __assert_fail ("unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && \"Invalid double permute\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5021, __extension__
__PRETTY_FUNCTION__))
;
5022 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
5023 } else
5024 assert(NewBytesMap[J] < 0 && "Invalid double permute")(static_cast <bool> (NewBytesMap[J] < 0 && "Invalid double permute"
) ? void (0) : __assert_fail ("NewBytesMap[J] < 0 && \"Invalid double permute\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5024, __extension__
__PRETTY_FUNCTION__))
;
5025 }
5026 } else {
5027 // Just use NewBytes on the operands.
5028 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
5029 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
5030 if (NewBytes[J] >= 0)
5031 Bytes[J] = I * SystemZ::VectorBytes + J;
5032 }
5033 }
5034 }
5035
5036 // Now we just have 2 inputs. Put the second operand in Ops[1].
5037 if (Stride > 1) {
5038 Ops[1] = Ops[Stride];
5039 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5040 if (Bytes[I] >= int(SystemZ::VectorBytes))
5041 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
5042 }
5043
5044 // Look for an instruction that can do the permute without resorting
5045 // to VPERM.
5046 unsigned OpNo0, OpNo1;
5047 SDValue Op;
5048 if (unpackWasPrepared() && Ops[1].isUndef())
5049 Op = Ops[0];
5050 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
5051 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
5052 else
5053 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
5054
5055 Op = insertUnpackIfPrepared(DAG, DL, Op);
5056
5057 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5058}
5059
5060#ifndef NDEBUG
5061static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
5062 dbgs() << Msg.c_str() << " { ";
5063 for (unsigned i = 0; i < Bytes.size(); i++)
5064 dbgs() << Bytes[i] << " ";
5065 dbgs() << "}\n";
5066}
5067#endif
5068
5069// If the Bytes vector matches an unpack operation, prepare to do the unpack
5070// after all else by removing the zero vector and the effect of the unpack on
5071// Bytes.
5072void GeneralShuffle::tryPrepareForUnpack() {
5073 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
5074 if (ZeroVecOpNo == UINT32_MAX(4294967295U) || Ops.size() == 1)
5075 return;
5076
5077 // Only do this if removing the zero vector reduces the depth, otherwise
5078 // the critical path will increase with the final unpack.
5079 if (Ops.size() > 2 &&
5080 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
5081 return;
5082
5083 // Find an unpack that would allow removing the zero vector from Ops.
5084 UnpackFromEltSize = 1;
5085 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
5086 bool MatchUnpack = true;
5087 SmallVector<int, SystemZ::VectorBytes> SrcBytes;
5088 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
5089 unsigned ToEltSize = UnpackFromEltSize * 2;
5090 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
5091 if (!IsZextByte)
5092 SrcBytes.push_back(Bytes[Elt]);
5093 if (Bytes[Elt] != -1) {
5094 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
5095 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
5096 MatchUnpack = false;
5097 break;
5098 }
5099 }
5100 }
5101 if (MatchUnpack) {
5102 if (Ops.size() == 2) {
5103 // Don't use unpack if a single source operand needs rearrangement.
5104 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
5105 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
5106 UnpackFromEltSize = UINT_MAX(2147483647 *2U +1U);
5107 return;
5108 }
5109 }
5110 break;
5111 }
5112 }
5113 if (UnpackFromEltSize > 4)
5114 return;
5115
5116 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dbgs() << "Preparing for final unpack of element size "
<< UnpackFromEltSize << ". Zero vector is Op#" <<
ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:"
);; } } while (false)
5117 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNodo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dbgs() << "Preparing for final unpack of element size "
<< UnpackFromEltSize << ". Zero vector is Op#" <<
ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:"
);; } } while (false)
5118 << ".\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dbgs() << "Preparing for final unpack of element size "
<< UnpackFromEltSize << ". Zero vector is Op#" <<
ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:"
);; } } while (false)
5119 dumpBytes(Bytes, "Original Bytes vector:");)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dbgs() << "Preparing for final unpack of element size "
<< UnpackFromEltSize << ". Zero vector is Op#" <<
ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:"
);; } } while (false)
;
5120
5121 // Apply the unpack in reverse to the Bytes array.
5122 unsigned B = 0;
5123 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
5124 Elt += UnpackFromEltSize;
5125 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
5126 Bytes[B] = Bytes[Elt];
5127 }
5128 while (B < SystemZ::VectorBytes)
5129 Bytes[B++] = -1;
5130
5131 // Remove the zero vector from Ops
5132 Ops.erase(&Ops[ZeroVecOpNo]);
5133 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5134 if (Bytes[I] >= 0) {
5135 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5136 if (OpNo > ZeroVecOpNo)
5137 Bytes[I] -= SystemZ::VectorBytes;
5138 }
5139
5140 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:"
); dbgs() << "\n";; } } while (false)
5141 dbgs() << "\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:"
); dbgs() << "\n";; } } while (false)
;
5142}
5143
5144SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
5145 const SDLoc &DL,
5146 SDValue Op) {
5147 if (!unpackWasPrepared())
5148 return Op;
5149 unsigned InBits = UnpackFromEltSize * 8;
5150 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
5151 SystemZ::VectorBits / InBits);
5152 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
5153 unsigned OutBits = InBits * 2;
5154 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
5155 SystemZ::VectorBits / OutBits);
5156 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
5157}
5158
5159// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
5160static bool isScalarToVector(SDValue Op) {
5161 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
5162 if (!Op.getOperand(I).isUndef())
5163 return false;
5164 return true;
5165}
5166
5167// Return a vector of type VT that contains Value in the first element.
5168// The other elements don't matter.
5169static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5170 SDValue Value) {
5171 // If we have a constant, replicate it to all elements and let the
5172 // BUILD_VECTOR lowering take care of it.
5173 if (Value.getOpcode() == ISD::Constant ||
5174 Value.getOpcode() == ISD::ConstantFP) {
5175 SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);
5176 return DAG.getBuildVector(VT, DL, Ops);
5177 }
5178 if (Value.isUndef())
5179 return DAG.getUNDEF(VT);
5180 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
5181}
5182
5183// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
5184// element 1. Used for cases in which replication is cheap.
5185static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5186 SDValue Op0, SDValue Op1) {
5187 if (Op0.isUndef()) {
5188 if (Op1.isUndef())
5189 return DAG.getUNDEF(VT);
5190 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
5191 }
5192 if (Op1.isUndef())
5193 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
5194 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
5195 buildScalarToVector(DAG, DL, VT, Op0),
5196 buildScalarToVector(DAG, DL, VT, Op1));
5197}
5198
5199// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
5200// vector for them.
5201static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
5202 SDValue Op1) {
5203 if (Op0.isUndef() && Op1.isUndef())
5204 return DAG.getUNDEF(MVT::v2i64);
5205 // If one of the two inputs is undefined then replicate the other one,
5206 // in order to avoid using another register unnecessarily.
5207 if (Op0.isUndef())
5208 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5209 else if (Op1.isUndef())
5210 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5211 else {
5212 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5213 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5214 }
5215 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
5216}
5217
5218// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
5219// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
5220// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
5221// would benefit from this representation and return it if so.
5222static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
5223 BuildVectorSDNode *BVN) {
5224 EVT VT = BVN->getValueType(0);
5225 unsigned NumElements = VT.getVectorNumElements();
5226
5227 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
5228 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
5229 // need a BUILD_VECTOR, add an additional placeholder operand for that
5230 // BUILD_VECTOR and store its operands in ResidueOps.
5231 GeneralShuffle GS(VT);
5232 SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps;
5233 bool FoundOne = false;
5234 for (unsigned I = 0; I < NumElements; ++I) {
5235 SDValue Op = BVN->getOperand(I);
5236 if (Op.getOpcode() == ISD::TRUNCATE)
5237 Op = Op.getOperand(0);
5238 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5239 Op.getOperand(1).getOpcode() == ISD::Constant) {
5240 unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
5241 if (!GS.add(Op.getOperand(0), Elem))
5242 return SDValue();
5243 FoundOne = true;
5244 } else if (Op.isUndef()) {
5245 GS.addUndef();
5246 } else {
5247 if (!GS.add(SDValue(), ResidueOps.size()))
5248 return SDValue();
5249 ResidueOps.push_back(BVN->getOperand(I));
5250 }
5251 }
5252
5253 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
5254 if (!FoundOne)
5255 return SDValue();
5256
5257 // Create the BUILD_VECTOR for the remaining elements, if any.
5258 if (!ResidueOps.empty()) {
5259 while (ResidueOps.size() < NumElements)
5260 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
5261 for (auto &Op : GS.Ops) {
5262 if (!Op.getNode()) {
5263 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
5264 break;
5265 }
5266 }
5267 }
5268 return GS.getNode(DAG, SDLoc(BVN));
5269}
5270
5271bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
5272 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
5273 return true;
5274 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
5275 return true;
5276 return false;
5277}
5278
5279// Combine GPR scalar values Elems into a vector of type VT.
5280SDValue
5281SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5282 SmallVectorImpl<SDValue> &Elems) const {
5283 // See whether there is a single replicated value.
5284 SDValue Single;
5285 unsigned int NumElements = Elems.size();
5286 unsigned int Count = 0;
5287 for (auto Elem : Elems) {
5288 if (!Elem.isUndef()) {
5289 if (!Single.getNode())
5290 Single = Elem;
5291 else if (Elem != Single) {
5292 Single = SDValue();
5293 break;
5294 }
5295 Count += 1;
5296 }
5297 }
5298 // There are three cases here:
5299 //
5300 // - if the only defined element is a loaded one, the best sequence
5301 // is a replicating load.
5302 //
5303 // - otherwise, if the only defined element is an i64 value, we will
5304 // end up with the same VLVGP sequence regardless of whether we short-cut
5305 // for replication or fall through to the later code.
5306 //
5307 // - otherwise, if the only defined element is an i32 or smaller value,
5308 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
5309 // This is only a win if the single defined element is used more than once.
5310 // In other cases we're better off using a single VLVGx.
5311 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
5312 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
5313
5314 // If all elements are loads, use VLREP/VLEs (below).
5315 bool AllLoads = true;
5316 for (auto Elem : Elems)
5317 if (!isVectorElementLoad(Elem)) {
5318 AllLoads = false;
5319 break;
5320 }
5321
5322 // The best way of building a v2i64 from two i64s is to use VLVGP.
5323 if (VT == MVT::v2i64 && !AllLoads)
5324 return joinDwords(DAG, DL, Elems[0], Elems[1]);
5325
5326 // Use a 64-bit merge high to combine two doubles.
5327 if (VT == MVT::v2f64 && !AllLoads)
5328 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5329
5330 // Build v4f32 values directly from the FPRs:
5331 //
5332 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
5333 // V V VMRHF
5334 // <ABxx> <CDxx>
5335 // V VMRHG
5336 // <ABCD>
5337 if (VT == MVT::v4f32 && !AllLoads) {
5338 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5339 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
5340 // Avoid unnecessary undefs by reusing the other operand.
5341 if (Op01.isUndef())
5342 Op01 = Op23;
5343 else if (Op23.isUndef())
5344 Op23 = Op01;
5345 // Merging identical replications is a no-op.
5346 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
5347 return Op01;
5348 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
5349 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
5350 SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
5351 DL, MVT::v2i64, Op01, Op23);
5352 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5353 }
5354
5355 // Collect the constant terms.
5356 SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue());
5357 SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
5358
5359 unsigned NumConstants = 0;
5360 for (unsigned I = 0; I < NumElements; ++I) {
5361 SDValue Elem = Elems[I];
5362 if (Elem.getOpcode() == ISD::Constant ||
5363 Elem.getOpcode() == ISD::ConstantFP) {
5364 NumConstants += 1;
5365 Constants[I] = Elem;
5366 Done[I] = true;
5367 }
5368 }
5369 // If there was at least one constant, fill in the other elements of
5370 // Constants with undefs to get a full vector constant and use that
5371 // as the starting point.
5372 SDValue Result;
5373 SDValue ReplicatedVal;
5374 if (NumConstants > 0) {
5375 for (unsigned I = 0; I < NumElements; ++I)
5376 if (!Constants[I].getNode())
5377 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
5378 Result = DAG.getBuildVector(VT, DL, Constants);
5379 } else {
5380 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
5381 // avoid a false dependency on any previous contents of the vector
5382 // register.
5383
5384 // Use a VLREP if at least one element is a load. Make sure to replicate
5385 // the load with the most elements having its value.
5386 std::map<const SDNode*, unsigned> UseCounts;
5387 SDNode *LoadMaxUses = nullptr;
5388 for (unsigned I = 0; I < NumElements; ++I)
5389 if (isVectorElementLoad(Elems[I])) {
5390 SDNode *Ld = Elems[I].getNode();
5391 UseCounts[Ld]++;
5392 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
5393 LoadMaxUses = Ld;
5394 }
5395 if (LoadMaxUses != nullptr) {
5396 ReplicatedVal = SDValue(LoadMaxUses, 0);
5397 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
5398 } else {
5399 // Try to use VLVGP.
5400 unsigned I1 = NumElements / 2 - 1;
5401 unsigned I2 = NumElements - 1;
5402 bool Def1 = !Elems[I1].isUndef();
5403 bool Def2 = !Elems[I2].isUndef();
5404 if (Def1 || Def2) {
5405 SDValue Elem1 = Elems[Def1 ? I1 : I2];
5406 SDValue Elem2 = Elems[Def2 ? I2 : I1];
5407 Result = DAG.getNode(ISD::BITCAST, DL, VT,
5408 joinDwords(DAG, DL, Elem1, Elem2));
5409 Done[I1] = true;
5410 Done[I2] = true;
5411 } else
5412 Result = DAG.getUNDEF(VT);
5413 }
5414 }
5415
5416 // Use VLVGx to insert the other elements.
5417 for (unsigned I = 0; I < NumElements; ++I)
5418 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
5419 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
5420 DAG.getConstant(I, DL, MVT::i32));
5421 return Result;
5422}
5423
5424SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
5425 SelectionDAG &DAG) const {
5426 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
5427 SDLoc DL(Op);
5428 EVT VT = Op.getValueType();
5429
5430 if (BVN->isConstant()) {
5431 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
5432 return Op;
5433
5434 // Fall back to loading it from memory.
5435 return SDValue();
5436 }
5437
5438 // See if we should use shuffles to construct the vector from other vectors.
5439 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
5440 return Res;
5441
5442 // Detect SCALAR_TO_VECTOR conversions.
5443 if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op))
5444 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
5445
5446 // Otherwise use buildVector to build the vector up from GPRs.
5447 unsigned NumElements = Op.getNumOperands();
5448 SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements);
5449 for (unsigned I = 0; I < NumElements; ++I)
5450 Ops[I] = Op.getOperand(I);
5451 return buildVector(DAG, DL, VT, Ops);
5452}
5453
5454SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
5455 SelectionDAG &DAG) const {
5456 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
5457 SDLoc DL(Op);
5458 EVT VT = Op.getValueType();
5459 unsigned NumElements = VT.getVectorNumElements();
5460
5461 if (VSN->isSplat()) {
5462 SDValue Op0 = Op.getOperand(0);
5463 unsigned Index = VSN->getSplatIndex();
5464 assert(Index < VT.getVectorNumElements() &&(static_cast <bool> (Index < VT.getVectorNumElements
() && "Splat index should be defined and in first operand"
) ? void (0) : __assert_fail ("Index < VT.getVectorNumElements() && \"Splat index should be defined and in first operand\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5465, __extension__
__PRETTY_FUNCTION__))
5465 "Splat index should be defined and in first operand")(static_cast <bool> (Index < VT.getVectorNumElements
() && "Splat index should be defined and in first operand"
) ? void (0) : __assert_fail ("Index < VT.getVectorNumElements() && \"Splat index should be defined and in first operand\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5465, __extension__
__PRETTY_FUNCTION__))
;
5466 // See whether the value we're splatting is directly available as a scalar.
5467 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5468 Op0.getOpcode() == ISD::BUILD_VECTOR)
5469 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
5470 // Otherwise keep it as a vector-to-vector operation.
5471 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
5472 DAG.getTargetConstant(Index, DL, MVT::i32));
5473 }
5474
5475 GeneralShuffle GS(VT);
5476 for (unsigned I = 0; I < NumElements; ++I) {
5477 int Elt = VSN->getMaskElt(I);
5478 if (Elt < 0)
5479 GS.addUndef();
5480 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
5481 unsigned(Elt) % NumElements))
5482 return SDValue();
5483 }
5484 return GS.getNode(DAG, SDLoc(VSN));
5485}
5486
5487SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
5488 SelectionDAG &DAG) const {
5489 SDLoc DL(Op);
5490 // Just insert the scalar into element 0 of an undefined vector.
5491 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
5492 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
5493 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
5494}
5495
5496SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
5497 SelectionDAG &DAG) const {
5498 // Handle insertions of floating-point values.
5499 SDLoc DL(Op);
5500 SDValue Op0 = Op.getOperand(0);
5501 SDValue Op1 = Op.getOperand(1);
5502 SDValue Op2 = Op.getOperand(2);
5503 EVT VT = Op.getValueType();
5504
5505 // Insertions into constant indices of a v2f64 can be done using VPDI.
5506 // However, if the inserted value is a bitcast or a constant then it's
5507 // better to use GPRs, as below.
5508 if (VT == MVT::v2f64 &&
5509 Op1.getOpcode() != ISD::BITCAST &&
5510 Op1.getOpcode() != ISD::ConstantFP &&
5511 Op2.getOpcode() == ISD::Constant) {
5512 uint64_t Index = cast<ConstantSDNode>(Op2)->getZExtValue();
5513 unsigned Mask = VT.getVectorNumElements() - 1;
5514 if (Index <= Mask)
5515 return Op;
5516 }
5517
5518 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
5519 MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
5520 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
5521 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
5522 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
5523 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
5524 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5525}
5526
5527SDValue
5528SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
5529 SelectionDAG &DAG) const {
5530 // Handle extractions of floating-point values.
5531 SDLoc DL(Op);
5532 SDValue Op0 = Op.getOperand(0);
5533 SDValue Op1 = Op.getOperand(1);
5534 EVT VT = Op.getValueType();
5535 EVT VecVT = Op0.getValueType();
5536
5537 // Extractions of constant indices can be done directly.
5538 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
5539 uint64_t Index = CIndexN->getZExtValue();
5540 unsigned Mask = VecVT.getVectorNumElements() - 1;
5541 if (Index <= Mask)
5542 return Op;
5543 }
5544
5545 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
5546 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
5547 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
5548 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
5549 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
5550 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5551}
5552
5553SDValue SystemZTargetLowering::
5554lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5555 SDValue PackedOp = Op.getOperand(0);
5556 EVT OutVT = Op.getValueType();
5557 EVT InVT = PackedOp.getValueType();
5558 unsigned ToBits = OutVT.getScalarSizeInBits();
5559 unsigned FromBits = InVT.getScalarSizeInBits();
5560 do {
5561 FromBits *= 2;
5562 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
5563 SystemZ::VectorBits / FromBits);
5564 PackedOp =
5565 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
5566 } while (FromBits != ToBits);
5567 return PackedOp;
5568}
5569
5570// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
5571SDValue SystemZTargetLowering::
5572lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5573 SDValue PackedOp = Op.getOperand(0);
5574 SDLoc DL(Op);
5575 EVT OutVT = Op.getValueType();
5576 EVT InVT = PackedOp.getValueType();
5577 unsigned InNumElts = InVT.getVectorNumElements();
5578 unsigned OutNumElts = OutVT.getVectorNumElements();
5579 unsigned NumInPerOut = InNumElts / OutNumElts;
5580
5581 SDValue ZeroVec =
5582 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
5583
5584 SmallVector<int, 16> Mask(InNumElts);
5585 unsigned ZeroVecElt = InNumElts;
5586 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
5587 unsigned MaskElt = PackedElt * NumInPerOut;
5588 unsigned End = MaskElt + NumInPerOut - 1;
5589 for (; MaskElt < End; MaskElt++)
5590 Mask[MaskElt] = ZeroVecElt++;
5591 Mask[MaskElt] = PackedElt;
5592 }
5593 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
5594 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
5595}
5596
5597SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
5598 unsigned ByScalar) const {
5599 // Look for cases where a vector shift can use the *_BY_SCALAR form.
5600 SDValue Op0 = Op.getOperand(0);
5601 SDValue Op1 = Op.getOperand(1);
5602 SDLoc DL(Op);
5603 EVT VT = Op.getValueType();
5604 unsigned ElemBitSize = VT.getScalarSizeInBits();
5605
5606 // See whether the shift vector is a splat represented as BUILD_VECTOR.
5607 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
5608 APInt SplatBits, SplatUndef;
5609 unsigned SplatBitSize;
5610 bool HasAnyUndefs;
5611 // Check for constant splats. Use ElemBitSize as the minimum element
5612 // width and reject splats that need wider elements.
5613 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
5614 ElemBitSize, true) &&
5615 SplatBitSize == ElemBitSize) {
5616 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
5617 DL, MVT::i32);
5618 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
5619 }
5620 // Check for variable splats.
5621 BitVector UndefElements;
5622 SDValue Splat = BVN->getSplatValue(&UndefElements);
5623 if (Splat) {
5624 // Since i32 is the smallest legal type, we either need a no-op
5625 // or a truncation.
5626 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
5627 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
5628 }
5629 }
5630
5631 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
5632 // and the shift amount is directly available in a GPR.
5633 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
5634 if (VSN->isSplat()) {
5635 SDValue VSNOp0 = VSN->getOperand(0);
5636 unsigned Index = VSN->getSplatIndex();
5637 assert(Index < VT.getVectorNumElements() &&(static_cast <bool> (Index < VT.getVectorNumElements
() && "Splat index should be defined and in first operand"
) ? void (0) : __assert_fail ("Index < VT.getVectorNumElements() && \"Splat index should be defined and in first operand\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5638, __extension__
__PRETTY_FUNCTION__))
5638 "Splat index should be defined and in first operand")(static_cast <bool> (Index < VT.getVectorNumElements
() && "Splat index should be defined and in first operand"
) ? void (0) : __assert_fail ("Index < VT.getVectorNumElements() && \"Splat index should be defined and in first operand\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5638, __extension__
__PRETTY_FUNCTION__))
;
5639 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5640 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
5641 // Since i32 is the smallest legal type, we either need a no-op
5642 // or a truncation.
5643 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
5644 VSNOp0.getOperand(Index));
5645 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
5646 }
5647 }
5648 }
5649
5650 // Otherwise just treat the current form as legal.
5651 return Op;
5652}
5653
5654SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
5655 SelectionDAG &DAG) const {
5656 SDLoc DL(Op);
5657 MVT ResultVT = Op.getSimpleValueType();
5658 SDValue Arg = Op.getOperand(0);
5659 auto CNode = cast<ConstantSDNode>(Op.getOperand(1));
5660 unsigned Check = CNode->getZExtValue();
5661
5662 unsigned TDCMask = 0;
5663 if (Check & fcSNan)
5664 TDCMask |= SystemZ::TDCMASK_SNAN_PLUS | SystemZ::TDCMASK_SNAN_MINUS;
5665 if (Check & fcQNan)
5666 TDCMask |= SystemZ::TDCMASK_QNAN_PLUS | SystemZ::TDCMASK_QNAN_MINUS;
5667 if (Check & fcPosInf)
5668 TDCMask |= SystemZ::TDCMASK_INFINITY_PLUS;
5669 if (Check & fcNegInf)
5670 TDCMask |= SystemZ::TDCMASK_INFINITY_MINUS;
5671 if (Check & fcPosNormal)
5672 TDCMask |= SystemZ::TDCMASK_NORMAL_PLUS;
5673 if (Check & fcNegNormal)
5674 TDCMask |= SystemZ::TDCMASK_NORMAL_MINUS;
5675 if (Check & fcPosSubnormal)
5676 TDCMask |= SystemZ::TDCMASK_SUBNORMAL_PLUS;
5677 if (Check & fcNegSubnormal)
5678 TDCMask |= SystemZ::TDCMASK_SUBNORMAL_MINUS;
5679 if (Check & fcPosZero)
5680 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
5681 if (Check & fcNegZero)
5682 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
5683 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
5684
5685 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
5686 return getCCResult(DAG, Intr);
5687}
5688
5689SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
5690 SelectionDAG &DAG) const {
5691 switch (Op.getOpcode()) {
5692 case ISD::FRAMEADDR:
5693 return lowerFRAMEADDR(Op, DAG);
5694 case ISD::RETURNADDR:
5695 return lowerRETURNADDR(Op, DAG);
5696 case ISD::BR_CC:
5697 return lowerBR_CC(Op, DAG);
5698 case ISD::SELECT_CC:
5699 return lowerSELECT_CC(Op, DAG);
5700 case ISD::SETCC:
5701 return lowerSETCC(Op, DAG);
5702 case ISD::STRICT_FSETCC:
5703 return lowerSTRICT_FSETCC(Op, DAG, false);
5704 case ISD::STRICT_FSETCCS:
5705 return lowerSTRICT_FSETCC(Op, DAG, true);
5706 case ISD::GlobalAddress:
5707 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
5708 case ISD::GlobalTLSAddress:
5709 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
5710 case ISD::BlockAddress:
5711 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
5712 case ISD::JumpTable:
5713 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
5714 case ISD::ConstantPool:
5715 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
5716 case ISD::BITCAST:
5717 return lowerBITCAST(Op, DAG);
5718 case ISD::VASTART:
5719 return lowerVASTART(Op, DAG);
5720 case ISD::VACOPY:
5721 return lowerVACOPY(Op, DAG);
5722 case ISD::DYNAMIC_STACKALLOC:
5723 return lowerDYNAMIC_STACKALLOC(Op, DAG);
5724 case ISD::GET_DYNAMIC_AREA_OFFSET:
5725 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
5726 case ISD::SMUL_LOHI:
5727 return lowerSMUL_LOHI(Op, DAG);
5728 case ISD::UMUL_LOHI:
5729 return lowerUMUL_LOHI(Op, DAG);
5730 case ISD::SDIVREM:
5731 return lowerSDIVREM(Op, DAG);
5732 case ISD::UDIVREM:
5733 return lowerUDIVREM(Op, DAG);
5734 case ISD::SADDO:
5735 case ISD::SSUBO:
5736 case ISD::UADDO:
5737 case ISD::USUBO:
5738 return lowerXALUO(Op, DAG);
5739 case ISD::ADDCARRY:
5740 case ISD::SUBCARRY:
5741 return lowerADDSUBCARRY(Op, DAG);
5742 case ISD::OR:
5743 return lowerOR(Op, DAG);
5744 case ISD::CTPOP:
5745 return lowerCTPOP(Op, DAG);
5746 case ISD::ATOMIC_FENCE:
5747 return lowerATOMIC_FENCE(Op, DAG);
5748 case ISD::ATOMIC_SWAP:
5749 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
5750 case ISD::ATOMIC_STORE:
5751 return lowerATOMIC_STORE(Op, DAG);
5752 case ISD::ATOMIC_LOAD:
5753 return lowerATOMIC_LOAD(Op, DAG);
5754 case ISD::ATOMIC_LOAD_ADD:
5755 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
5756 case ISD::ATOMIC_LOAD_SUB:
5757 return lowerATOMIC_LOAD_SUB(Op, DAG);
5758 case ISD::ATOMIC_LOAD_AND:
5759 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
5760 case ISD::ATOMIC_LOAD_OR:
5761 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
5762 case ISD::ATOMIC_LOAD_XOR:
5763 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
5764 case ISD::ATOMIC_LOAD_NAND:
5765 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
5766 case ISD::ATOMIC_LOAD_MIN:
5767 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
5768 case ISD::ATOMIC_LOAD_MAX:
5769 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
5770 case ISD::ATOMIC_LOAD_UMIN:
5771 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
5772 case ISD::ATOMIC_LOAD_UMAX:
5773 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
5774 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
5775 return lowerATOMIC_CMP_SWAP(Op, DAG);
5776 case ISD::STACKSAVE:
5777 return lowerSTACKSAVE(Op, DAG);
5778 case ISD::STACKRESTORE:
5779 return lowerSTACKRESTORE(Op, DAG);
5780 case ISD::PREFETCH:
5781 return lowerPREFETCH(Op, DAG);
5782 case ISD::INTRINSIC_W_CHAIN:
5783 return lowerINTRINSIC_W_CHAIN(Op, DAG);
5784 case ISD::INTRINSIC_WO_CHAIN:
5785 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
5786 case ISD::BUILD_VECTOR:
5787 return lowerBUILD_VECTOR(Op, DAG);
5788 case ISD::VECTOR_SHUFFLE:
5789 return lowerVECTOR_SHUFFLE(Op, DAG);
5790 case ISD::SCALAR_TO_VECTOR:
5791 return lowerSCALAR_TO_VECTOR(Op, DAG);
5792 case ISD::INSERT_VECTOR_ELT:
5793 return lowerINSERT_VECTOR_ELT(Op, DAG);
5794 case ISD::EXTRACT_VECTOR_ELT:
5795 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
5796 case ISD::SIGN_EXTEND_VECTOR_INREG:
5797 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
5798 case ISD::ZERO_EXTEND_VECTOR_INREG:
5799 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
5800 case ISD::SHL:
5801 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
5802 case ISD::SRL:
5803 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
5804 case ISD::SRA:
5805 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
5806 case ISD::IS_FPCLASS:
5807 return lowerIS_FPCLASS(Op, DAG);
5808 default:
5809 llvm_unreachable("Unexpected node to lower")::llvm::llvm_unreachable_internal("Unexpected node to lower",
"llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5809)
;
5810 }
5811}
5812
5813// Lower operations with invalid operand or result types (currently used
5814// only for 128-bit integer types).
5815void
5816SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
5817 SmallVectorImpl<SDValue> &Results,
5818 SelectionDAG &DAG) const {
5819 switch (N->getOpcode()) {
5820 case ISD::ATOMIC_LOAD: {
5821 SDLoc DL(N);
5822 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
5823 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
5824 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5825 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
5826 DL, Tys, Ops, MVT::i128, MMO);
5827 Results.push_back(lowerGR128ToI128(DAG, Res));
5828 Results.push_back(Res.getValue(1));
5829 break;
5830 }
5831 case ISD::ATOMIC_STORE: {
5832 SDLoc DL(N);
5833 SDVTList Tys = DAG.getVTList(MVT::Other);
5834 SDValue Ops[] = { N->getOperand(0),
5835 lowerI128ToGR128(DAG, N->getOperand(2)),
5836 N->getOperand(1) };
5837 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5838 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128,
5839 DL, Tys, Ops, MVT::i128, MMO);
5840 // We have to enforce sequential consistency by performing a
5841 // serialization operation after the store.
5842 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
5843 AtomicOrdering::SequentiallyConsistent)
5844 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
5845 MVT::Other, Res), 0);
5846 Results.push_back(Res);
5847 break;
5848 }
5849 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
5850 SDLoc DL(N);
5851 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
5852 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
5853 lowerI128ToGR128(DAG, N->getOperand(2)),
5854 lowerI128ToGR128(DAG, N->getOperand(3)) };
5855 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5856 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128,
5857 DL, Tys, Ops, MVT::i128, MMO);
5858 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
5859 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);
5860 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
5861 Results.push_back(lowerGR128ToI128(DAG, Res));
5862 Results.push_back(Success);
5863 Results.push_back(Res.getValue(2));
5864 break;
5865 }
5866 case ISD::BITCAST: {
5867 SDValue Src = N->getOperand(0);
5868 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
5869 !useSoftFloat()) {
5870 SDLoc DL(N);
5871 SDValue Lo, Hi;
5872 if (getRepRegClassFor(MVT::f128) == &SystemZ::VR128BitRegClass) {
5873 SDValue VecBC = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Src);
5874 Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
5875 DAG.getConstant(1, DL, MVT::i32));
5876 Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
5877 DAG.getConstant(0, DL, MVT::i32));
5878 } else {
5879 assert(getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass &&(static_cast <bool> (getRepRegClassFor(MVT::f128) == &
SystemZ::FP128BitRegClass && "Unrecognized register class for f128."
) ? void (0) : __assert_fail ("getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass && \"Unrecognized register class for f128.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5880, __extension__
__PRETTY_FUNCTION__))
5880 "Unrecognized register class for f128.")(static_cast <bool> (getRepRegClassFor(MVT::f128) == &
SystemZ::FP128BitRegClass && "Unrecognized register class for f128."
) ? void (0) : __assert_fail ("getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass && \"Unrecognized register class for f128.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5880, __extension__
__PRETTY_FUNCTION__))
;
5881 SDValue LoFP = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
5882 DL, MVT::f64, Src);
5883 SDValue HiFP = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
5884 DL, MVT::f64, Src);
5885 Lo = DAG.getNode(ISD::BITCAST, DL, MVT::i64, LoFP);
5886 Hi = DAG.getNode(ISD::BITCAST, DL, MVT::i64, HiFP);
5887 }
5888 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi));
5889 }
5890 break;
5891 }
5892 default:
5893 llvm_unreachable("Unexpected node to lower")::llvm::llvm_unreachable_internal("Unexpected node to lower",
"llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5893)
;
5894 }
5895}
5896
5897void
5898SystemZTargetLowering::ReplaceNodeResults(SDNode *N,
5899 SmallVectorImpl<SDValue> &Results,
5900 SelectionDAG &DAG) const {
5901 return LowerOperationWrapper(N, Results, DAG);
5902}
5903
5904const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
5905#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
5906 switch ((SystemZISD::NodeType)Opcode) {
5907 case SystemZISD::FIRST_NUMBER: break;
5908 OPCODE(RET_FLAG);
5909 OPCODE(CALL);
5910 OPCODE(SIBCALL);
5911 OPCODE(TLS_GDCALL);
5912 OPCODE(TLS_LDCALL);
5913 OPCODE(PCREL_WRAPPER);
5914 OPCODE(PCREL_OFFSET);
5915 OPCODE(ICMP);
5916 OPCODE(FCMP);
5917 OPCODE(STRICT_FCMP);
5918 OPCODE(STRICT_FCMPS);
5919 OPCODE(TM);
5920 OPCODE(BR_CCMASK);
5921 OPCODE(SELECT_CCMASK);
5922 OPCODE(ADJDYNALLOC);
5923 OPCODE(PROBED_ALLOCA);
5924 OPCODE(POPCNT);
5925 OPCODE(SMUL_LOHI);
5926 OPCODE(UMUL_LOHI);
5927 OPCODE(SDIVREM);
5928 OPCODE(UDIVREM);
5929 OPCODE(SADDO);
5930 OPCODE(SSUBO);
5931 OPCODE(UADDO);
5932 OPCODE(USUBO);
5933 OPCODE(ADDCARRY);
5934 OPCODE(SUBCARRY);
5935 OPCODE(GET_CCMASK);
5936 OPCODE(MVC);
5937 OPCODE(NC);
5938 OPCODE(OC);
5939 OPCODE(XC);
5940 OPCODE(CLC);
5941 OPCODE(MEMSET_MVC);
5942 OPCODE(STPCPY);
5943 OPCODE(STRCMP);
5944 OPCODE(SEARCH_STRING);
5945 OPCODE(IPM);
5946 OPCODE(MEMBARRIER);
5947 OPCODE(TBEGIN);
5948 OPCODE(TBEGIN_NOFLOAT);
5949 OPCODE(TEND);
5950 OPCODE(BYTE_MASK);
5951 OPCODE(ROTATE_MASK);
5952 OPCODE(REPLICATE);
5953 OPCODE(JOIN_DWORDS);
5954 OPCODE(SPLAT);
5955 OPCODE(MERGE_HIGH);
5956 OPCODE(MERGE_LOW);
5957 OPCODE(SHL_DOUBLE);
5958 OPCODE(PERMUTE_DWORDS);
5959 OPCODE(PERMUTE);
5960 OPCODE(PACK);
5961 OPCODE(PACKS_CC);
5962 OPCODE(PACKLS_CC);
5963 OPCODE(UNPACK_HIGH);
5964 OPCODE(UNPACKL_HIGH);
5965 OPCODE(UNPACK_LOW);
5966 OPCODE(UNPACKL_LOW);
5967 OPCODE(VSHL_BY_SCALAR);
5968 OPCODE(VSRL_BY_SCALAR);
5969 OPCODE(VSRA_BY_SCALAR);
5970 OPCODE(VSUM);
5971 OPCODE(VICMPE);
5972 OPCODE(VICMPH);
5973 OPCODE(VICMPHL);
5974 OPCODE(VICMPES);
5975 OPCODE(VICMPHS);
5976 OPCODE(VICMPHLS);
5977 OPCODE(VFCMPE);
5978 OPCODE(STRICT_VFCMPE);
5979 OPCODE(STRICT_VFCMPES);
5980 OPCODE(VFCMPH);
5981 OPCODE(STRICT_VFCMPH);
5982 OPCODE(STRICT_VFCMPHS);
5983 OPCODE(VFCMPHE);
5984 OPCODE(STRICT_VFCMPHE);
5985 OPCODE(STRICT_VFCMPHES);
5986 OPCODE(VFCMPES);
5987 OPCODE(VFCMPHS);
5988 OPCODE(VFCMPHES);
5989 OPCODE(VFTCI);
5990 OPCODE(VEXTEND);
5991 OPCODE(STRICT_VEXTEND);
5992 OPCODE(VROUND);
5993 OPCODE(STRICT_VROUND);
5994 OPCODE(VTM);
5995 OPCODE(VFAE_CC);
5996 OPCODE(VFAEZ_CC);
5997 OPCODE(VFEE_CC);