Bug Summary

File:llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
Warning:line 780, column 36
The result of the left shift is undefined due to shifting by '18446744073709551615', which is greater or equal to the width of type 'uint64_t'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name SystemZISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/SystemZ -I /build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/llvm/lib/Target/SystemZ -I include -I /build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/= -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-01-26-233846-219801-1 -x c++ /build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

/build/llvm-toolchain-snapshot-14~++20220126101029+f487a76430a0/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
15#include "SystemZConstantPoolValue.h"
16#include "SystemZMachineFunctionInfo.h"
17#include "SystemZTargetMachine.h"
18#include "llvm/CodeGen/CallingConvLower.h"
19#include "llvm/CodeGen/MachineInstrBuilder.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
22#include "llvm/IR/IntrinsicInst.h"
23#include "llvm/IR/Intrinsics.h"
24#include "llvm/IR/IntrinsicsS390.h"
25#include "llvm/Support/CommandLine.h"
26#include "llvm/Support/KnownBits.h"
27#include <cctype>
28
29using namespace llvm;
30
31#define DEBUG_TYPE"systemz-lower" "systemz-lower"
32
33namespace {
34// Represents information about a comparison.
35struct Comparison {
36 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
37 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
38 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
39
40 // The operands to the comparison.
41 SDValue Op0, Op1;
42
43 // Chain if this is a strict floating-point comparison.
44 SDValue Chain;
45
46 // The opcode that should be used to compare Op0 and Op1.
47 unsigned Opcode;
48
49 // A SystemZICMP value. Only used for integer comparisons.
50 unsigned ICmpType;
51
52 // The mask of CC values that Opcode can produce.
53 unsigned CCValid;
54
55 // The mask of CC values for which the original condition is true.
56 unsigned CCMask;
57};
58} // end anonymous namespace
59
60// Classify VT as either 32 or 64 bit.
61static bool is32Bit(EVT VT) {
62 switch (VT.getSimpleVT().SimpleTy) {
63 case MVT::i32:
64 return true;
65 case MVT::i64:
66 return false;
67 default:
68 llvm_unreachable("Unsupported type")::llvm::llvm_unreachable_internal("Unsupported type", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 68)
;
69 }
70}
71
72// Return a version of MachineOperand that can be safely used before the
73// final use.
74static MachineOperand earlyUseOperand(MachineOperand Op) {
75 if (Op.isReg())
76 Op.setIsKill(false);
77 return Op;
78}
79
80SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
81 const SystemZSubtarget &STI)
82 : TargetLowering(TM), Subtarget(STI) {
83 MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0));
84
85 auto *Regs = STI.getSpecialRegisters();
86
87 // Set up the register classes.
88 if (Subtarget.hasHighWord())
89 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
90 else
91 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
92 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
93 if (!useSoftFloat()) {
94 if (Subtarget.hasVector()) {
95 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
96 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
97 } else {
98 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
99 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
100 }
101 if (Subtarget.hasVectorEnhancements1())
102 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
103 else
104 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
105
106 if (Subtarget.hasVector()) {
107 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
108 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
109 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
110 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
111 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
112 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
113 }
114 }
115
116 // Compute derived properties from the register classes
117 computeRegisterProperties(Subtarget.getRegisterInfo());
118
119 // Set up special registers.
120 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
121
122 // TODO: It may be better to default to latency-oriented scheduling, however
123 // LLVM's current latency-oriented scheduler can't handle physreg definitions
124 // such as SystemZ has with CC, so set this to the register-pressure
125 // scheduler, because it can.
126 setSchedulingPreference(Sched::RegPressure);
127
128 setBooleanContents(ZeroOrOneBooleanContent);
129 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
130
131 // Instructions are strings of 2-byte aligned 2-byte values.
132 setMinFunctionAlignment(Align(2));
133 // For performance reasons we prefer 16-byte alignment.
134 setPrefFunctionAlignment(Align(16));
135
136 // Handle operations that are handled in a similar way for all types.
137 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
138 I <= MVT::LAST_FP_VALUETYPE;
139 ++I) {
140 MVT VT = MVT::SimpleValueType(I);
141 if (isTypeLegal(VT)) {
142 // Lower SET_CC into an IPM-based sequence.
143 setOperationAction(ISD::SETCC, VT, Custom);
144 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
145 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
146
147 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
148 setOperationAction(ISD::SELECT, VT, Expand);
149
150 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
151 setOperationAction(ISD::SELECT_CC, VT, Custom);
152 setOperationAction(ISD::BR_CC, VT, Custom);
153 }
154 }
155
156 // Expand jump table branches as address arithmetic followed by an
157 // indirect jump.
158 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
159
160 // Expand BRCOND into a BR_CC (see above).
161 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
162
163 // Handle integer types.
164 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
165 I <= MVT::LAST_INTEGER_VALUETYPE;
166 ++I) {
167 MVT VT = MVT::SimpleValueType(I);
168 if (isTypeLegal(VT)) {
169 setOperationAction(ISD::ABS, VT, Legal);
170
171 // Expand individual DIV and REMs into DIVREMs.
172 setOperationAction(ISD::SDIV, VT, Expand);
173 setOperationAction(ISD::UDIV, VT, Expand);
174 setOperationAction(ISD::SREM, VT, Expand);
175 setOperationAction(ISD::UREM, VT, Expand);
176 setOperationAction(ISD::SDIVREM, VT, Custom);
177 setOperationAction(ISD::UDIVREM, VT, Custom);
178
179 // Support addition/subtraction with overflow.
180 setOperationAction(ISD::SADDO, VT, Custom);
181 setOperationAction(ISD::SSUBO, VT, Custom);
182
183 // Support addition/subtraction with carry.
184 setOperationAction(ISD::UADDO, VT, Custom);
185 setOperationAction(ISD::USUBO, VT, Custom);
186
187 // Support carry in as value rather than glue.
188 setOperationAction(ISD::ADDCARRY, VT, Custom);
189 setOperationAction(ISD::SUBCARRY, VT, Custom);
190
191 // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
192 // stores, putting a serialization instruction after the stores.
193 setOperationAction(ISD::ATOMIC_LOAD, VT, Custom);
194 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
195
196 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
197 // available, or if the operand is constant.
198 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
199
200 // Use POPCNT on z196 and above.
201 if (Subtarget.hasPopulationCount())
202 setOperationAction(ISD::CTPOP, VT, Custom);
203 else
204 setOperationAction(ISD::CTPOP, VT, Expand);
205
206 // No special instructions for these.
207 setOperationAction(ISD::CTTZ, VT, Expand);
208 setOperationAction(ISD::ROTR, VT, Expand);
209
210 // Use *MUL_LOHI where possible instead of MULH*.
211 setOperationAction(ISD::MULHS, VT, Expand);
212 setOperationAction(ISD::MULHU, VT, Expand);
213 setOperationAction(ISD::SMUL_LOHI, VT, Custom);
214 setOperationAction(ISD::UMUL_LOHI, VT, Custom);
215
216 // Only z196 and above have native support for conversions to unsigned.
217 // On z10, promoting to i64 doesn't generate an inexact condition for
218 // values that are outside the i32 range but in the i64 range, so use
219 // the default expansion.
220 if (!Subtarget.hasFPExtension())
221 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
222
223 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
224 // default to Expand, so need to be modified to Legal where appropriate.
225 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal);
226 if (Subtarget.hasFPExtension())
227 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal);
228
229 // And similarly for STRICT_[SU]INT_TO_FP.
230 setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal);
231 if (Subtarget.hasFPExtension())
232 setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal);
233 }
234 }
235
236 // Type legalization will convert 8- and 16-bit atomic operations into
237 // forms that operate on i32s (but still keeping the original memory VT).
238 // Lower them into full i32 operations.
239 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom);
240 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom);
241 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
242 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
243 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom);
244 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom);
245 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
246 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom);
247 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom);
248 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
249 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
250
251 // Even though i128 is not a legal type, we still need to custom lower
252 // the atomic operations in order to exploit SystemZ instructions.
253 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
254 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
255
256 // We can use the CC result of compare-and-swap to implement
257 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
258 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom);
259 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom);
260 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
261
262 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
263
264 // Traps are legal, as we will convert them to "j .+2".
265 setOperationAction(ISD::TRAP, MVT::Other, Legal);
266
267 // z10 has instructions for signed but not unsigned FP conversion.
268 // Handle unsigned 32-bit types as signed 64-bit types.
269 if (!Subtarget.hasFPExtension()) {
270 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
271 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
272 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Promote);
273 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
274 }
275
276 // We have native support for a 64-bit CTLZ, via FLOGR.
277 setOperationAction(ISD::CTLZ, MVT::i32, Promote);
278 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
279 setOperationAction(ISD::CTLZ, MVT::i64, Legal);
280
281 // On z15 we have native support for a 64-bit CTPOP.
282 if (Subtarget.hasMiscellaneousExtensions3()) {
283 setOperationAction(ISD::CTPOP, MVT::i32, Promote);
284 setOperationAction(ISD::CTPOP, MVT::i64, Legal);
285 }
286
287 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
288 setOperationAction(ISD::OR, MVT::i64, Custom);
289
290 // Expand 128 bit shifts without using a libcall.
291 setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
292 setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
293 setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
294 setLibcallName(RTLIB::SRL_I128, nullptr);
295 setLibcallName(RTLIB::SHL_I128, nullptr);
296 setLibcallName(RTLIB::SRA_I128, nullptr);
297
298 // Handle bitcast from fp128 to i128.
299 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
300
301 // We have native instructions for i8, i16 and i32 extensions, but not i1.
302 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
303 for (MVT VT : MVT::integer_valuetypes()) {
304 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
305 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
306 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
307 }
308
309 // Handle the various types of symbolic address.
310 setOperationAction(ISD::ConstantPool, PtrVT, Custom);
311 setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
312 setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
313 setOperationAction(ISD::BlockAddress, PtrVT, Custom);
314 setOperationAction(ISD::JumpTable, PtrVT, Custom);
315
316 // We need to handle dynamic allocations specially because of the
317 // 160-byte area at the bottom of the stack.
318 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
319 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom);
320
321 setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
322 setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
323
324 // Handle prefetches with PFD or PFDRL.
325 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
326
327 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
328 // Assume by default that all vector operations need to be expanded.
329 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
330 if (getOperationAction(Opcode, VT) == Legal)
331 setOperationAction(Opcode, VT, Expand);
332
333 // Likewise all truncating stores and extending loads.
334 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
335 setTruncStoreAction(VT, InnerVT, Expand);
336 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
337 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
338 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
339 }
340
341 if (isTypeLegal(VT)) {
342 // These operations are legal for anything that can be stored in a
343 // vector register, even if there is no native support for the format
344 // as such. In particular, we can do these for v4f32 even though there
345 // are no specific instructions for that format.
346 setOperationAction(ISD::LOAD, VT, Legal);
347 setOperationAction(ISD::STORE, VT, Legal);
348 setOperationAction(ISD::VSELECT, VT, Legal);
349 setOperationAction(ISD::BITCAST, VT, Legal);
350 setOperationAction(ISD::UNDEF, VT, Legal);
351
352 // Likewise, except that we need to replace the nodes with something
353 // more specific.
354 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
355 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
356 }
357 }
358
359 // Handle integer vector types.
360 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
361 if (isTypeLegal(VT)) {
362 // These operations have direct equivalents.
363 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
364 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
365 setOperationAction(ISD::ADD, VT, Legal);
366 setOperationAction(ISD::SUB, VT, Legal);
367 if (VT != MVT::v2i64)
368 setOperationAction(ISD::MUL, VT, Legal);
369 setOperationAction(ISD::ABS, VT, Legal);
370 setOperationAction(ISD::AND, VT, Legal);
371 setOperationAction(ISD::OR, VT, Legal);
372 setOperationAction(ISD::XOR, VT, Legal);
373 if (Subtarget.hasVectorEnhancements1())
374 setOperationAction(ISD::CTPOP, VT, Legal);
375 else
376 setOperationAction(ISD::CTPOP, VT, Custom);
377 setOperationAction(ISD::CTTZ, VT, Legal);
378 setOperationAction(ISD::CTLZ, VT, Legal);
379
380 // Convert a GPR scalar to a vector by inserting it into element 0.
381 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
382
383 // Use a series of unpacks for extensions.
384 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
385 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
386
387 // Detect shifts by a scalar amount and convert them into
388 // V*_BY_SCALAR.
389 setOperationAction(ISD::SHL, VT, Custom);
390 setOperationAction(ISD::SRA, VT, Custom);
391 setOperationAction(ISD::SRL, VT, Custom);
392
393 // At present ROTL isn't matched by DAGCombiner. ROTR should be
394 // converted into ROTL.
395 setOperationAction(ISD::ROTL, VT, Expand);
396 setOperationAction(ISD::ROTR, VT, Expand);
397
398 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
399 // and inverting the result as necessary.
400 setOperationAction(ISD::SETCC, VT, Custom);
401 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
402 if (Subtarget.hasVectorEnhancements1())
403 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
404 }
405 }
406
407 if (Subtarget.hasVector()) {
408 // There should be no need to check for float types other than v2f64
409 // since <2 x f32> isn't a legal type.
410 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
411 setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal);
412 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
413 setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal);
414 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
415 setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal);
416 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
417 setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);
418
419 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
420 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal);
421 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
422 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal);
423 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
424 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f64, Legal);
425 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
426 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f64, Legal);
427 }
428
429 if (Subtarget.hasVectorEnhancements2()) {
430 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
431 setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal);
432 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
433 setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal);
434 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
435 setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal);
436 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
437 setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal);
438
439 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
440 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal);
441 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
442 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal);
443 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
444 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f32, Legal);
445 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
446 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f32, Legal);
447 }
448
449 // Handle floating-point types.
450 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
451 I <= MVT::LAST_FP_VALUETYPE;
452 ++I) {
453 MVT VT = MVT::SimpleValueType(I);
454 if (isTypeLegal(VT)) {
455 // We can use FI for FRINT.
456 setOperationAction(ISD::FRINT, VT, Legal);
457
458 // We can use the extended form of FI for other rounding operations.
459 if (Subtarget.hasFPExtension()) {
460 setOperationAction(ISD::FNEARBYINT, VT, Legal);
461 setOperationAction(ISD::FFLOOR, VT, Legal);
462 setOperationAction(ISD::FCEIL, VT, Legal);
463 setOperationAction(ISD::FTRUNC, VT, Legal);
464 setOperationAction(ISD::FROUND, VT, Legal);
465 }
466
467 // No special instructions for these.
468 setOperationAction(ISD::FSIN, VT, Expand);
469 setOperationAction(ISD::FCOS, VT, Expand);
470 setOperationAction(ISD::FSINCOS, VT, Expand);
471 setOperationAction(ISD::FREM, VT, Expand);
472 setOperationAction(ISD::FPOW, VT, Expand);
473
474 // Handle constrained floating-point operations.
475 setOperationAction(ISD::STRICT_FADD, VT, Legal);
476 setOperationAction(ISD::STRICT_FSUB, VT, Legal);
477 setOperationAction(ISD::STRICT_FMUL, VT, Legal);
478 setOperationAction(ISD::STRICT_FDIV, VT, Legal);
479 setOperationAction(ISD::STRICT_FMA, VT, Legal);
480 setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
481 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
482 setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal);
483 setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
484 if (Subtarget.hasFPExtension()) {
485 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
486 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
487 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
488 setOperationAction(ISD::STRICT_FROUND, VT, Legal);
489 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
490 }
491 }
492 }
493
494 // Handle floating-point vector types.
495 if (Subtarget.hasVector()) {
496 // Scalar-to-vector conversion is just a subreg.
497 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
498 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
499
500 // Some insertions and extractions can be done directly but others
501 // need to go via integers.
502 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
503 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
504 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
505 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
506
507 // These operations have direct equivalents.
508 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
509 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
510 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
511 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
512 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
513 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
514 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
515 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
516 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
517 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
518 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
519 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
520 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
521 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
522
523 // Handle constrained floating-point operations.
524 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
525 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
526 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
527 setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
528 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
529 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
530 setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
531 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
532 setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
533 setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
534 setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
535 setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
536 }
537
538 // The vector enhancements facility 1 has instructions for these.
539 if (Subtarget.hasVectorEnhancements1()) {
540 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
541 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
542 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
543 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
544 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
545 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
546 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
547 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
548 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
549 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
550 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
551 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
552 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
553 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
554
555 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
556 setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal);
557 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
558 setOperationAction(ISD::FMINIMUM, MVT::f64, Legal);
559
560 setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal);
561 setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal);
562 setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);
563 setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal);
564
565 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
566 setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
567 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
568 setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
569
570 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
571 setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
572 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
573 setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
574
575 setOperationAction(ISD::FMAXNUM, MVT::f128, Legal);
576 setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
577 setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
578 setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
579
580 // Handle constrained floating-point operations.
581 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
582 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
583 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
584 setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
585 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
586 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
587 setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
588 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
589 setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
590 setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
591 setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
592 setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
593 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
594 MVT::v4f32, MVT::v2f64 }) {
595 setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal);
596 setOperationAction(ISD::STRICT_FMINNUM, VT, Legal);
597 setOperationAction(ISD::STRICT_FMAXIMUM, VT, Legal);
598 setOperationAction(ISD::STRICT_FMINIMUM, VT, Legal);
599 }
600 }
601
602 // We only have fused f128 multiply-addition on vector registers.
603 if (!Subtarget.hasVectorEnhancements1()) {
604 setOperationAction(ISD::FMA, MVT::f128, Expand);
605 setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand);
606 }
607
608 // We don't have a copysign instruction on vector registers.
609 if (Subtarget.hasVectorEnhancements1())
610 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
611
612 // Needed so that we don't try to implement f128 constant loads using
613 // a load-and-extend of a f80 constant (in cases where the constant
614 // would fit in an f80).
615 for (MVT VT : MVT::fp_valuetypes())
616 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
617
618 // We don't have extending load instruction on vector registers.
619 if (Subtarget.hasVectorEnhancements1()) {
620 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
621 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
622 }
623
624 // Floating-point truncation and stores need to be done separately.
625 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
626 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
627 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
628
629 // We have 64-bit FPR<->GPR moves, but need special handling for
630 // 32-bit forms.
631 if (!Subtarget.hasVector()) {
632 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
633 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
634 }
635
636 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
637 // structure, but VAEND is a no-op.
638 setOperationAction(ISD::VASTART, MVT::Other, Custom);
639 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
640 setOperationAction(ISD::VAEND, MVT::Other, Expand);
641
642 // Codes for which we want to perform some z-specific combinations.
643 setTargetDAGCombine(ISD::ZERO_EXTEND);
644 setTargetDAGCombine(ISD::SIGN_EXTEND);
645 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
646 setTargetDAGCombine(ISD::LOAD);
647 setTargetDAGCombine(ISD::STORE);
648 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
649 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
650 setTargetDAGCombine(ISD::FP_ROUND);
651 setTargetDAGCombine(ISD::STRICT_FP_ROUND);
652 setTargetDAGCombine(ISD::FP_EXTEND);
653 setTargetDAGCombine(ISD::SINT_TO_FP);
654 setTargetDAGCombine(ISD::UINT_TO_FP);
655 setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
656 setTargetDAGCombine(ISD::BSWAP);
657 setTargetDAGCombine(ISD::SDIV);
658 setTargetDAGCombine(ISD::UDIV);
659 setTargetDAGCombine(ISD::SREM);
660 setTargetDAGCombine(ISD::UREM);
661 setTargetDAGCombine(ISD::INTRINSIC_VOID);
662 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
663
664 // Handle intrinsics.
665 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
666 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
667
668 // We want to use MVC in preference to even a single load/store pair.
669 MaxStoresPerMemcpy = 0;
670 MaxStoresPerMemcpyOptSize = 0;
671
672 // The main memset sequence is a byte store followed by an MVC.
673 // Two STC or MV..I stores win over that, but the kind of fused stores
674 // generated by target-independent code don't when the byte value is
675 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
676 // than "STC;MVC". Handle the choice in target-specific code instead.
677 MaxStoresPerMemset = 0;
678 MaxStoresPerMemsetOptSize = 0;
679
680 // Default to having -disable-strictnode-mutation on
681 IsStrictFPEnabled = true;
682}
683
684bool SystemZTargetLowering::useSoftFloat() const {
685 return Subtarget.hasSoftFloat();
686}
687
688EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
689 LLVMContext &, EVT VT) const {
690 if (!VT.isVector())
691 return MVT::i32;
692 return VT.changeVectorElementTypeToInteger();
693}
694
695bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(
696 const MachineFunction &MF, EVT VT) const {
697 VT = VT.getScalarType();
698
699 if (!VT.isSimple())
700 return false;
701
702 switch (VT.getSimpleVT().SimpleTy) {
703 case MVT::f32:
704 case MVT::f64:
705 return true;
706 case MVT::f128:
707 return Subtarget.hasVectorEnhancements1();
708 default:
709 break;
710 }
711
712 return false;
713}
714
715// Return true if the constant can be generated with a vector instruction,
716// such as VGM, VGMB or VREPI.
717bool SystemZVectorConstantInfo::isVectorConstantLegal(
718 const SystemZSubtarget &Subtarget) {
719 const SystemZInstrInfo *TII =
720 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
721 if (!Subtarget.hasVector() ||
3
Assuming the condition is false
722 (isFP128 && !Subtarget.hasVectorEnhancements1()))
4
Assuming field 'isFP128' is false
723 return false;
724
725 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
726 // preferred way of creating all-zero and all-one vectors so give it
727 // priority over other methods below.
728 unsigned Mask = 0;
729 unsigned I = 0;
730 for (; I < SystemZ::VectorBytes; ++I) {
5
Loop condition is true. Entering loop body
731 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
732 if (Byte == 0xff)
6
Assuming 'Byte' is not equal to 255
7
Taking false branch
733 Mask |= 1ULL << I;
734 else if (Byte != 0)
8
Assuming 'Byte' is not equal to 0
9
Taking true branch
735 break;
736 }
737 if (I
10.1
'I' is not equal to 'VectorBytes'
10.1
'I' is not equal to 'VectorBytes'
10.1
'I' is not equal to 'VectorBytes'
== SystemZ::VectorBytes) {
10
Execution continues on line 737
11
Taking false branch
738 Opcode = SystemZISD::BYTE_MASK;
739 OpVals.push_back(Mask);
740 VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16);
741 return true;
742 }
743
744 if (SplatBitSize
11.1
Field 'SplatBitSize' is <= 64
11.1
Field 'SplatBitSize' is <= 64
11.1
Field 'SplatBitSize' is <= 64
> 64)
12
Taking false branch
745 return false;
746
747 auto tryValue = [&](uint64_t Value) -> bool {
748 // Try VECTOR REPLICATE IMMEDIATE
749 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
750 if (isInt<16>(SignedValue)) {
751 OpVals.push_back(((unsigned) SignedValue));
752 Opcode = SystemZISD::REPLICATE;
753 VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
754 SystemZ::VectorBits / SplatBitSize);
755 return true;
756 }
757 // Try VECTOR GENERATE MASK
758 unsigned Start, End;
759 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
760 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
761 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
762 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
763 OpVals.push_back(Start - (64 - SplatBitSize));
764 OpVals.push_back(End - (64 - SplatBitSize));
765 Opcode = SystemZISD::ROTATE_MASK;
766 VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
767 SystemZ::VectorBits / SplatBitSize);
768 return true;
769 }
770 return false;
771 };
772
773 // First try assuming that any undefined bits above the highest set bit
774 // and below the lowest set bit are 1s. This increases the likelihood of
775 // being able to use a sign-extended element value in VECTOR REPLICATE
776 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
777 uint64_t SplatBitsZ = SplatBits.getZExtValue();
778 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
779 uint64_t Lower =
780 (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
13
Calling 'findFirstSet<unsigned long>'
20
Returning from 'findFirstSet<unsigned long>'
21
The result of the left shift is undefined due to shifting by '18446744073709551615', which is greater or equal to the width of type 'uint64_t'
781 uint64_t Upper =
782 (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
783 if (tryValue(SplatBitsZ | Upper | Lower))
784 return true;
785
786 // Now try assuming that any undefined bits between the first and
787 // last defined set bits are set. This increases the chances of
788 // using a non-wraparound mask.
789 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
790 return tryValue(SplatBitsZ | Middle);
791}
792
793SystemZVectorConstantInfo::SystemZVectorConstantInfo(APFloat FPImm) {
794 IntBits = FPImm.bitcastToAPInt().zextOrSelf(128);
795 isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
796 SplatBits = FPImm.bitcastToAPInt();
797 unsigned Width = SplatBits.getBitWidth();
798 IntBits <<= (SystemZ::VectorBits - Width);
799
800 // Find the smallest splat.
801 while (Width > 8) {
802 unsigned HalfSize = Width / 2;
803 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
804 APInt LowValue = SplatBits.trunc(HalfSize);
805
806 // If the two halves do not match, stop here.
807 if (HighValue != LowValue || 8 > HalfSize)
808 break;
809
810 SplatBits = HighValue;
811 Width = HalfSize;
812 }
813 SplatUndef = 0;
814 SplatBitSize = Width;
815}
816
817SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) {
818 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR")(static_cast <bool> (BVN->isConstant() && "Expected a constant BUILD_VECTOR"
) ? void (0) : __assert_fail ("BVN->isConstant() && \"Expected a constant BUILD_VECTOR\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 818, __extension__
__PRETTY_FUNCTION__))
;
819 bool HasAnyUndefs;
820
821 // Get IntBits by finding the 128 bit splat.
822 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
823 true);
824
825 // Get SplatBits by finding the 8 bit or greater splat.
826 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
827 true);
828}
829
830bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
831 bool ForCodeSize) const {
832 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
833 if (Imm.isZero() || Imm.isNegZero())
1
Taking false branch
834 return true;
835
836 return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
2
Calling 'SystemZVectorConstantInfo::isVectorConstantLegal'
837}
838
839/// Returns true if stack probing through inline assembly is requested.
840bool SystemZTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
841 // If the function specifically requests inline stack probes, emit them.
842 if (MF.getFunction().hasFnAttribute("probe-stack"))
843 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
844 "inline-asm";
845 return false;
846}
847
848bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
849 // We can use CGFI or CLGFI.
850 return isInt<32>(Imm) || isUInt<32>(Imm);
851}
852
853bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
854 // We can use ALGFI or SLGFI.
855 return isUInt<32>(Imm) || isUInt<32>(-Imm);
856}
857
858bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
859 EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const {
860 // Unaligned accesses should never be slower than the expanded version.
861 // We check specifically for aligned accesses in the few cases where
862 // they are required.
863 if (Fast)
864 *Fast = true;
865 return true;
866}
867
868// Information about the addressing mode for a memory access.
869struct AddressingMode {
870 // True if a long displacement is supported.
871 bool LongDisplacement;
872
873 // True if use of index register is supported.
874 bool IndexReg;
875
876 AddressingMode(bool LongDispl, bool IdxReg) :
877 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
878};
879
880// Return the desired addressing mode for a Load which has only one use (in
881// the same block) which is a Store.
882static AddressingMode getLoadStoreAddrMode(bool HasVector,
883 Type *Ty) {
884 // With vector support a Load->Store combination may be combined to either
885 // an MVC or vector operations and it seems to work best to allow the
886 // vector addressing mode.
887 if (HasVector)
888 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
889
890 // Otherwise only the MVC case is special.
891 bool MVC = Ty->isIntegerTy(8);
892 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
893}
894
895// Return the addressing mode which seems most desirable given an LLVM
896// Instruction pointer.
897static AddressingMode
898supportedAddressingMode(Instruction *I, bool HasVector) {
899 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
900 switch (II->getIntrinsicID()) {
901 default: break;
902 case Intrinsic::memset:
903 case Intrinsic::memmove:
904 case Intrinsic::memcpy:
905 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
906 }
907 }
908
909 if (isa<LoadInst>(I) && I->hasOneUse()) {
910 auto *SingleUser = cast<Instruction>(*I->user_begin());
911 if (SingleUser->getParent() == I->getParent()) {
912 if (isa<ICmpInst>(SingleUser)) {
913 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
914 if (C->getBitWidth() <= 64 &&
915 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
916 // Comparison of memory with 16 bit signed / unsigned immediate
917 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
918 } else if (isa<StoreInst>(SingleUser))
919 // Load->Store
920 return getLoadStoreAddrMode(HasVector, I->getType());
921 }
922 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
923 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
924 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
925 // Load->Store
926 return getLoadStoreAddrMode(HasVector, LoadI->getType());
927 }
928
929 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
930
931 // * Use LDE instead of LE/LEY for z13 to avoid partial register
932 // dependencies (LDE only supports small offsets).
933 // * Utilize the vector registers to hold floating point
934 // values (vector load / store instructions only support small
935 // offsets).
936
937 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
938 I->getOperand(0)->getType());
939 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
940 bool IsVectorAccess = MemAccessTy->isVectorTy();
941
942 // A store of an extracted vector element will be combined into a VSTE type
943 // instruction.
944 if (!IsVectorAccess && isa<StoreInst>(I)) {
945 Value *DataOp = I->getOperand(0);
946 if (isa<ExtractElementInst>(DataOp))
947 IsVectorAccess = true;
948 }
949
950 // A load which gets inserted into a vector element will be combined into a
951 // VLE type instruction.
952 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
953 User *LoadUser = *I->user_begin();
954 if (isa<InsertElementInst>(LoadUser))
955 IsVectorAccess = true;
956 }
957
958 if (IsFPAccess || IsVectorAccess)
959 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
960 }
961
962 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
963}
964
965bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
966 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
967 // Punt on globals for now, although they can be used in limited
968 // RELATIVE LONG cases.
969 if (AM.BaseGV)
970 return false;
971
972 // Require a 20-bit signed offset.
973 if (!isInt<20>(AM.BaseOffs))
974 return false;
975
976 AddressingMode SupportedAM(true, true);
977 if (I != nullptr)
978 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
979
980 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
981 return false;
982
983 if (!SupportedAM.IndexReg)
984 // No indexing allowed.
985 return AM.Scale == 0;
986 else
987 // Indexing is OK but no scale factor can be applied.
988 return AM.Scale == 0 || AM.Scale == 1;
989}
990
991bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
992 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
993 return false;
994 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedSize();
995 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedSize();
996 return FromBits > ToBits;
997}
998
999bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
1000 if (!FromVT.isInteger() || !ToVT.isInteger())
1001 return false;
1002 unsigned FromBits = FromVT.getFixedSizeInBits();
1003 unsigned ToBits = ToVT.getFixedSizeInBits();
1004 return FromBits > ToBits;
1005}
1006
1007//===----------------------------------------------------------------------===//
1008// Inline asm support
1009//===----------------------------------------------------------------------===//
1010
1011TargetLowering::ConstraintType
1012SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
1013 if (Constraint.size() == 1) {
1014 switch (Constraint[0]) {
1015 case 'a': // Address register
1016 case 'd': // Data register (equivalent to 'r')
1017 case 'f': // Floating-point register
1018 case 'h': // High-part register
1019 case 'r': // General-purpose register
1020 case 'v': // Vector register
1021 return C_RegisterClass;
1022
1023 case 'Q': // Memory with base and unsigned 12-bit displacement
1024 case 'R': // Likewise, plus an index
1025 case 'S': // Memory with base and signed 20-bit displacement
1026 case 'T': // Likewise, plus an index
1027 case 'm': // Equivalent to 'T'.
1028 return C_Memory;
1029
1030 case 'I': // Unsigned 8-bit constant
1031 case 'J': // Unsigned 12-bit constant
1032 case 'K': // Signed 16-bit constant
1033 case 'L': // Signed 20-bit displacement (on all targets we support)
1034 case 'M': // 0x7fffffff
1035 return C_Immediate;
1036
1037 default:
1038 break;
1039 }
1040 }
1041 return TargetLowering::getConstraintType(Constraint);
1042}
1043
1044TargetLowering::ConstraintWeight SystemZTargetLowering::
1045getSingleConstraintMatchWeight(AsmOperandInfo &info,
1046 const char *constraint) const {
1047 ConstraintWeight weight = CW_Invalid;
1048 Value *CallOperandVal = info.CallOperandVal;
1049 // If we don't have a value, we can't do a match,
1050 // but allow it at the lowest weight.
1051 if (!CallOperandVal)
1052 return CW_Default;
1053 Type *type = CallOperandVal->getType();
1054 // Look at the constraint type.
1055 switch (*constraint) {
1056 default:
1057 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
1058 break;
1059
1060 case 'a': // Address register
1061 case 'd': // Data register (equivalent to 'r')
1062 case 'h': // High-part register
1063 case 'r': // General-purpose register
1064 if (CallOperandVal->getType()->isIntegerTy())
1065 weight = CW_Register;
1066 break;
1067
1068 case 'f': // Floating-point register
1069 if (type->isFloatingPointTy())
1070 weight = CW_Register;
1071 break;
1072
1073 case 'v': // Vector register
1074 if ((type->isVectorTy() || type->isFloatingPointTy()) &&
1075 Subtarget.hasVector())
1076 weight = CW_Register;
1077 break;
1078
1079 case 'I': // Unsigned 8-bit constant
1080 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1081 if (isUInt<8>(C->getZExtValue()))
1082 weight = CW_Constant;
1083 break;
1084
1085 case 'J': // Unsigned 12-bit constant
1086 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1087 if (isUInt<12>(C->getZExtValue()))
1088 weight = CW_Constant;
1089 break;
1090
1091 case 'K': // Signed 16-bit constant
1092 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1093 if (isInt<16>(C->getSExtValue()))
1094 weight = CW_Constant;
1095 break;
1096
1097 case 'L': // Signed 20-bit displacement (on all targets we support)
1098 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1099 if (isInt<20>(C->getSExtValue()))
1100 weight = CW_Constant;
1101 break;
1102
1103 case 'M': // 0x7fffffff
1104 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1105 if (C->getZExtValue() == 0x7fffffff)
1106 weight = CW_Constant;
1107 break;
1108 }
1109 return weight;
1110}
1111
1112// Parse a "{tNNN}" register constraint for which the register type "t"
1113// has already been verified. MC is the class associated with "t" and
1114// Map maps 0-based register numbers to LLVM register numbers.
1115static std::pair<unsigned, const TargetRegisterClass *>
1116parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC,
1117 const unsigned *Map, unsigned Size) {
1118 assert(*(Constraint.end()-1) == '}' && "Missing '}'")(static_cast <bool> (*(Constraint.end()-1) == '}' &&
"Missing '}'") ? void (0) : __assert_fail ("*(Constraint.end()-1) == '}' && \"Missing '}'\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1118, __extension__
__PRETTY_FUNCTION__))
;
1119 if (isdigit(Constraint[2])) {
1120 unsigned Index;
1121 bool Failed =
1122 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1123 if (!Failed && Index < Size && Map[Index])
1124 return std::make_pair(Map[Index], RC);
1125 }
1126 return std::make_pair(0U, nullptr);
1127}
1128
1129std::pair<unsigned, const TargetRegisterClass *>
1130SystemZTargetLowering::getRegForInlineAsmConstraint(
1131 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1132 if (Constraint.size() == 1) {
1133 // GCC Constraint Letters
1134 switch (Constraint[0]) {
1135 default: break;
1136 case 'd': // Data register (equivalent to 'r')
1137 case 'r': // General-purpose register
1138 if (VT == MVT::i64)
1139 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1140 else if (VT == MVT::i128)
1141 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1142 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1143
1144 case 'a': // Address register
1145 if (VT == MVT::i64)
1146 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1147 else if (VT == MVT::i128)
1148 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1149 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1150
1151 case 'h': // High-part register (an LLVM extension)
1152 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1153
1154 case 'f': // Floating-point register
1155 if (!useSoftFloat()) {
1156 if (VT == MVT::f64)
1157 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1158 else if (VT == MVT::f128)
1159 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1160 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1161 }
1162 break;
1163 case 'v': // Vector register
1164 if (Subtarget.hasVector()) {
1165 if (VT == MVT::f32)
1166 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1167 if (VT == MVT::f64)
1168 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1169 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1170 }
1171 break;
1172 }
1173 }
1174 if (Constraint.size() > 0 && Constraint[0] == '{') {
1175 // We need to override the default register parsing for GPRs and FPRs
1176 // because the interpretation depends on VT. The internal names of
1177 // the registers are also different from the external names
1178 // (F0D and F0S instead of F0, etc.).
1179 if (Constraint[1] == 'r') {
1180 if (VT == MVT::i32)
1181 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1182 SystemZMC::GR32Regs, 16);
1183 if (VT == MVT::i128)
1184 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1185 SystemZMC::GR128Regs, 16);
1186 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1187 SystemZMC::GR64Regs, 16);
1188 }
1189 if (Constraint[1] == 'f') {
1190 if (useSoftFloat())
1191 return std::make_pair(
1192 0u, static_cast<const TargetRegisterClass *>(nullptr));
1193 if (VT == MVT::f32)
1194 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1195 SystemZMC::FP32Regs, 16);
1196 if (VT == MVT::f128)
1197 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1198 SystemZMC::FP128Regs, 16);
1199 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1200 SystemZMC::FP64Regs, 16);
1201 }
1202 if (Constraint[1] == 'v') {
1203 if (!Subtarget.hasVector())
1204 return std::make_pair(
1205 0u, static_cast<const TargetRegisterClass *>(nullptr));
1206 if (VT == MVT::f32)
1207 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1208 SystemZMC::VR32Regs, 32);
1209 if (VT == MVT::f64)
1210 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1211 SystemZMC::VR64Regs, 32);
1212 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1213 SystemZMC::VR128Regs, 32);
1214 }
1215 }
1216 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1217}
1218
1219// FIXME? Maybe this could be a TableGen attribute on some registers and
1220// this table could be generated automatically from RegInfo.
1221Register SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT,
1222 const MachineFunction &MF) const {
1223
1224 Register Reg = StringSwitch<Register>(RegName)
1225 .Case("r15", SystemZ::R15D)
1226 .Default(0);
1227 if (Reg)
1228 return Reg;
1229 report_fatal_error("Invalid register name global variable");
1230}
1231
1232void SystemZTargetLowering::
1233LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1234 std::vector<SDValue> &Ops,
1235 SelectionDAG &DAG) const {
1236 // Only support length 1 constraints for now.
1237 if (Constraint.length() == 1) {
1238 switch (Constraint[0]) {
1239 case 'I': // Unsigned 8-bit constant
1240 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1241 if (isUInt<8>(C->getZExtValue()))
1242 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1243 Op.getValueType()));
1244 return;
1245
1246 case 'J': // Unsigned 12-bit constant
1247 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1248 if (isUInt<12>(C->getZExtValue()))
1249 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1250 Op.getValueType()));
1251 return;
1252
1253 case 'K': // Signed 16-bit constant
1254 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1255 if (isInt<16>(C->getSExtValue()))
1256 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1257 Op.getValueType()));
1258 return;
1259
1260 case 'L': // Signed 20-bit displacement (on all targets we support)
1261 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1262 if (isInt<20>(C->getSExtValue()))
1263 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1264 Op.getValueType()));
1265 return;
1266
1267 case 'M': // 0x7fffffff
1268 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1269 if (C->getZExtValue() == 0x7fffffff)
1270 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1271 Op.getValueType()));
1272 return;
1273 }
1274 }
1275 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1276}
1277
1278//===----------------------------------------------------------------------===//
1279// Calling conventions
1280//===----------------------------------------------------------------------===//
1281
1282#include "SystemZGenCallingConv.inc"
1283
1284const MCPhysReg *SystemZTargetLowering::getScratchRegisters(
1285 CallingConv::ID) const {
1286 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1287 SystemZ::R14D, 0 };
1288 return ScratchRegs;
1289}
1290
1291bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
1292 Type *ToType) const {
1293 return isTruncateFree(FromType, ToType);
1294}
1295
1296bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1297 return CI->isTailCall();
1298}
1299
1300// We do not yet support 128-bit single-element vector types. If the user
1301// attempts to use such types as function argument or return type, prefer
1302// to error out instead of emitting code violating the ABI.
1303static void VerifyVectorType(MVT VT, EVT ArgVT) {
1304 if (ArgVT.isVector() && !VT.isVector())
1305 report_fatal_error("Unsupported vector argument or return type");
1306}
1307
1308static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) {
1309 for (unsigned i = 0; i < Ins.size(); ++i)
1310 VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
1311}
1312
1313static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
1314 for (unsigned i = 0; i < Outs.size(); ++i)
1315 VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
1316}
1317
1318// Value is a value that has been passed to us in the location described by VA
1319// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1320// any loads onto Chain.
1321static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL,
1322 CCValAssign &VA, SDValue Chain,
1323 SDValue Value) {
1324 // If the argument has been promoted from a smaller type, insert an
1325 // assertion to capture this.
1326 if (VA.getLocInfo() == CCValAssign::SExt)
1327 Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
1328 DAG.getValueType(VA.getValVT()));
1329 else if (VA.getLocInfo() == CCValAssign::ZExt)
1330 Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
1331 DAG.getValueType(VA.getValVT()));
1332
1333 if (VA.isExtInLoc())
1334 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1335 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1336 // If this is a short vector argument loaded from the stack,
1337 // extend from i64 to full vector size and then bitcast.
1338 assert(VA.getLocVT() == MVT::i64)(static_cast <bool> (VA.getLocVT() == MVT::i64) ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1338, __extension__ __PRETTY_FUNCTION__))
;
1339 assert(VA.getValVT().isVector())(static_cast <bool> (VA.getValVT().isVector()) ? void (
0) : __assert_fail ("VA.getValVT().isVector()", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1339, __extension__ __PRETTY_FUNCTION__))
;
1340 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1341 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1342 } else
1343 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo")(static_cast <bool> (VA.getLocInfo() == CCValAssign::Full
&& "Unsupported getLocInfo") ? void (0) : __assert_fail
("VA.getLocInfo() == CCValAssign::Full && \"Unsupported getLocInfo\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1343, __extension__
__PRETTY_FUNCTION__))
;
1344 return Value;
1345}
1346
1347// Value is a value of type VA.getValVT() that we need to copy into
1348// the location described by VA. Return a copy of Value converted to
1349// VA.getValVT(). The caller is responsible for handling indirect values.
1350static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
1351 CCValAssign &VA, SDValue Value) {
1352 switch (VA.getLocInfo()) {
1353 case CCValAssign::SExt:
1354 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1355 case CCValAssign::ZExt:
1356 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1357 case CCValAssign::AExt:
1358 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1359 case CCValAssign::BCvt: {
1360 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128)(static_cast <bool> (VA.getLocVT() == MVT::i64 || VA.getLocVT
() == MVT::i128) ? void (0) : __assert_fail ("VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1360, __extension__
__PRETTY_FUNCTION__))
;
1361 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f64 ||(static_cast <bool> (VA.getValVT().isVector() || VA.getValVT
() == MVT::f64 || VA.getValVT() == MVT::f128) ? void (0) : __assert_fail
("VA.getValVT().isVector() || VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1362, __extension__
__PRETTY_FUNCTION__))
1362 VA.getValVT() == MVT::f128)(static_cast <bool> (VA.getValVT().isVector() || VA.getValVT
() == MVT::f64 || VA.getValVT() == MVT::f128) ? void (0) : __assert_fail
("VA.getValVT().isVector() || VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1362, __extension__
__PRETTY_FUNCTION__))
;
1363 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1364 ? MVT::v2i64
1365 : VA.getLocVT();
1366 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1367 // For ELF, this is a short vector argument to be stored to the stack,
1368 // bitcast to v2i64 and then extract first element.
1369 if (BitCastToType == MVT::v2i64)
1370 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1371 DAG.getConstant(0, DL, MVT::i32));
1372 return Value;
1373 }
1374 case CCValAssign::Full:
1375 return Value;
1376 default:
1377 llvm_unreachable("Unhandled getLocInfo()")::llvm::llvm_unreachable_internal("Unhandled getLocInfo()", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1377)
;
1378 }
1379}
1380
1381static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) {
1382 SDLoc DL(In);
1383 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
1384 DAG.getIntPtrConstant(0, DL));
1385 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
1386 DAG.getIntPtrConstant(1, DL));
1387 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1388 MVT::Untyped, Hi, Lo);
1389 return SDValue(Pair, 0);
1390}
1391
1392static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) {
1393 SDLoc DL(In);
1394 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1395 DL, MVT::i64, In);
1396 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1397 DL, MVT::i64, In);
1398 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1399}
1400
1401bool SystemZTargetLowering::splitValueIntoRegisterParts(
1402 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1403 unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
1404 EVT ValueVT = Val.getValueType();
1405 assert((ValueVT != MVT::i128 ||(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1408, __extension__
__PRETTY_FUNCTION__))
1406 ((NumParts == 1 && PartVT == MVT::Untyped) ||(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1408, __extension__
__PRETTY_FUNCTION__))
1407 (NumParts == 2 && PartVT == MVT::i64))) &&(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1408, __extension__
__PRETTY_FUNCTION__))
1408 "Unknown handling of i128 value.")(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1408, __extension__
__PRETTY_FUNCTION__))
;
1409 if (ValueVT == MVT::i128 && NumParts == 1) {
1410 // Inline assembly operand.
1411 Parts[0] = lowerI128ToGR128(DAG, Val);
1412 return true;
1413 }
1414 return false;
1415}
1416
1417SDValue SystemZTargetLowering::joinRegisterPartsIntoValue(
1418 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1419 MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
1420 assert((ValueVT != MVT::i128 ||(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1423, __extension__
__PRETTY_FUNCTION__))
1421 ((NumParts == 1 && PartVT == MVT::Untyped) ||(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1423, __extension__
__PRETTY_FUNCTION__))
1422 (NumParts == 2 && PartVT == MVT::i64))) &&(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1423, __extension__
__PRETTY_FUNCTION__))
1423 "Unknown handling of i128 value.")(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1423, __extension__
__PRETTY_FUNCTION__))
;
1424 if (ValueVT == MVT::i128 && NumParts == 1)
1425 // Inline assembly operand.
1426 return lowerGR128ToI128(DAG, Parts[0]);
1427 return SDValue();
1428}
1429
1430SDValue SystemZTargetLowering::LowerFormalArguments(
1431 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1432 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1433 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1434 MachineFunction &MF = DAG.getMachineFunction();
1435 MachineFrameInfo &MFI = MF.getFrameInfo();
1436 MachineRegisterInfo &MRI = MF.getRegInfo();
1437 SystemZMachineFunctionInfo *FuncInfo =
1438 MF.getInfo<SystemZMachineFunctionInfo>();
1439 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1440 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1441
1442 // Detect unsupported vector argument types.
1443 if (Subtarget.hasVector())
1444 VerifyVectorTypes(Ins);
1445
1446 // Assign locations to all of the incoming arguments.
1447 SmallVector<CCValAssign, 16> ArgLocs;
1448 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1449 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1450
1451 unsigned NumFixedGPRs = 0;
1452 unsigned NumFixedFPRs = 0;
1453 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1454 SDValue ArgValue;
1455 CCValAssign &VA = ArgLocs[I];
1456 EVT LocVT = VA.getLocVT();
1457 if (VA.isRegLoc()) {
1458 // Arguments passed in registers
1459 const TargetRegisterClass *RC;
1460 switch (LocVT.getSimpleVT().SimpleTy) {
1461 default:
1462 // Integers smaller than i64 should be promoted to i64.
1463 llvm_unreachable("Unexpected argument type")::llvm::llvm_unreachable_internal("Unexpected argument type",
"llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1463)
;
1464 case MVT::i32:
1465 NumFixedGPRs += 1;
1466 RC = &SystemZ::GR32BitRegClass;
1467 break;
1468 case MVT::i64:
1469 NumFixedGPRs += 1;
1470 RC = &SystemZ::GR64BitRegClass;
1471 break;
1472 case MVT::f32:
1473 NumFixedFPRs += 1;
1474 RC = &SystemZ::FP32BitRegClass;
1475 break;
1476 case MVT::f64:
1477 NumFixedFPRs += 1;
1478 RC = &SystemZ::FP64BitRegClass;
1479 break;
1480 case MVT::f128:
1481 NumFixedFPRs += 2;
1482 RC = &SystemZ::FP128BitRegClass;
1483 break;
1484 case MVT::v16i8:
1485 case MVT::v8i16:
1486 case MVT::v4i32:
1487 case MVT::v2i64:
1488 case MVT::v4f32:
1489 case MVT::v2f64:
1490 RC = &SystemZ::VR128BitRegClass;
1491 break;
1492 }
1493
1494 Register VReg = MRI.createVirtualRegister(RC);
1495 MRI.addLiveIn(VA.getLocReg(), VReg);
1496 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1497 } else {
1498 assert(VA.isMemLoc() && "Argument not register or memory")(static_cast <bool> (VA.isMemLoc() && "Argument not register or memory"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"Argument not register or memory\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1498, __extension__
__PRETTY_FUNCTION__))
;
1499
1500 // Create the frame index object for this incoming parameter.
1501 // FIXME: Pre-include call frame size in the offset, should not
1502 // need to manually add it here.
1503 int64_t ArgSPOffset = VA.getLocMemOffset();
1504 if (Subtarget.isTargetXPLINK64()) {
1505 auto &XPRegs =
1506 Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
1507 ArgSPOffset += XPRegs.getCallFrameSize();
1508 }
1509 int FI =
1510 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1511
1512 // Create the SelectionDAG nodes corresponding to a load
1513 // from this parameter. Unpromoted ints and floats are
1514 // passed as right-justified 8-byte values.
1515 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1516 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1517 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1518 DAG.getIntPtrConstant(4, DL));
1519 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1520 MachinePointerInfo::getFixedStack(MF, FI));
1521 }
1522
1523 // Convert the value of the argument register into the value that's
1524 // being passed.
1525 if (VA.getLocInfo() == CCValAssign::Indirect) {
1526 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1527 MachinePointerInfo()));
1528 // If the original argument was split (e.g. i128), we need
1529 // to load all parts of it here (using the same address).
1530 unsigned ArgIndex = Ins[I].OrigArgIndex;
1531 assert (Ins[I].PartOffset == 0)(static_cast <bool> (Ins[I].PartOffset == 0) ? void (0)
: __assert_fail ("Ins[I].PartOffset == 0", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1531, __extension__ __PRETTY_FUNCTION__))
;
1532 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1533 CCValAssign &PartVA = ArgLocs[I + 1];
1534 unsigned PartOffset = Ins[I + 1].PartOffset;
1535 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1536 DAG.getIntPtrConstant(PartOffset, DL));
1537 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1538 MachinePointerInfo()));
1539 ++I;
1540 }
1541 } else
1542 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1543 }
1544
1545 // FIXME: Add support for lowering varargs for XPLINK64 in a later patch.
1546 if (IsVarArg && Subtarget.isTargetELF()) {
1547 // Save the number of non-varargs registers for later use by va_start, etc.
1548 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1549 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1550
1551 // Likewise the address (in the form of a frame index) of where the
1552 // first stack vararg would be. The 1-byte size here is arbitrary.
1553 int64_t StackSize = CCInfo.getNextStackOffset();
1554 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
1555
1556 // ...and a similar frame index for the caller-allocated save area
1557 // that will be used to store the incoming registers.
1558 int64_t RegSaveOffset =
1559 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1560 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1561 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1562
1563 // Store the FPR varargs in the reserved frame slots. (We store the
1564 // GPRs as part of the prologue.)
1565 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1566 SDValue MemOps[SystemZ::ELFNumArgFPRs];
1567 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1568 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1569 int FI =
1570 MFI.CreateFixedObject(8, -SystemZMC::ELFCallFrameSize + Offset, true);
1571 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1572 Register VReg = MF.addLiveIn(SystemZ::ELFArgFPRs[I],
1573 &SystemZ::FP64BitRegClass);
1574 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1575 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1576 MachinePointerInfo::getFixedStack(MF, FI));
1577 }
1578 // Join the stores, which are independent of one another.
1579 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1580 makeArrayRef(&MemOps[NumFixedFPRs],
1581 SystemZ::ELFNumArgFPRs-NumFixedFPRs));
1582 }
1583 }
1584
1585 // FIXME: For XPLINK64, Add in support for handling incoming "ADA" special
1586 // register (R5)
1587 return Chain;
1588}
1589
1590static bool canUseSiblingCall(const CCState &ArgCCInfo,
1591 SmallVectorImpl<CCValAssign> &ArgLocs,
1592 SmallVectorImpl<ISD::OutputArg> &Outs) {
1593 // Punt if there are any indirect or stack arguments, or if the call
1594 // needs the callee-saved argument register R6, or if the call uses
1595 // the callee-saved register arguments SwiftSelf and SwiftError.
1596 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1597 CCValAssign &VA = ArgLocs[I];
1598 if (VA.getLocInfo() == CCValAssign::Indirect)
1599 return false;
1600 if (!VA.isRegLoc())
1601 return false;
1602 Register Reg = VA.getLocReg();
1603 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1604 return false;
1605 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1606 return false;
1607 }
1608 return true;
1609}
1610
1611SDValue
1612SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
1613 SmallVectorImpl<SDValue> &InVals) const {
1614 SelectionDAG &DAG = CLI.DAG;
1615 SDLoc &DL = CLI.DL;
1616 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1617 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1618 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1619 SDValue Chain = CLI.Chain;
1620 SDValue Callee = CLI.Callee;
1621 bool &IsTailCall = CLI.IsTailCall;
1622 CallingConv::ID CallConv = CLI.CallConv;
1623 bool IsVarArg = CLI.IsVarArg;
1624 MachineFunction &MF = DAG.getMachineFunction();
1625 EVT PtrVT = getPointerTy(MF.getDataLayout());
1626 LLVMContext &Ctx = *DAG.getContext();
1627 SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
1628
1629 // FIXME: z/OS support to be added in later.
1630 if (Subtarget.isTargetXPLINK64())
1631 IsTailCall = false;
1632
1633 // Detect unsupported vector argument and return types.
1634 if (Subtarget.hasVector()) {
1635 VerifyVectorTypes(Outs);
1636 VerifyVectorTypes(Ins);
1637 }
1638
1639 // Analyze the operands of the call, assigning locations to each operand.
1640 SmallVector<CCValAssign, 16> ArgLocs;
1641 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1642 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1643
1644 // We don't support GuaranteedTailCallOpt, only automatically-detected
1645 // sibling calls.
1646 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1647 IsTailCall = false;
1648
1649 // Get a count of how many bytes are to be pushed on the stack.
1650 unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1651
1652 if (Subtarget.isTargetXPLINK64())
1653 // Although the XPLINK specifications for AMODE64 state that minimum size
1654 // of the param area is minimum 32 bytes and no rounding is otherwise
1655 // specified, we round this area in 64 bytes increments to be compatible
1656 // with existing compilers.
1657 NumBytes = std::max(64U, (unsigned)alignTo(NumBytes, 64));
1658
1659 // Mark the start of the call.
1660 if (!IsTailCall)
1661 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1662
1663 // Copy argument values to their designated locations.
1664 SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
1665 SmallVector<SDValue, 8> MemOpChains;
1666 SDValue StackPtr;
1667 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1668 CCValAssign &VA = ArgLocs[I];
1669 SDValue ArgValue = OutVals[I];
1670
1671 if (VA.getLocInfo() == CCValAssign::Indirect) {
1672 // Store the argument in a stack slot and pass its address.
1673 unsigned ArgIndex = Outs[I].OrigArgIndex;
1674 EVT SlotVT;
1675 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1676 // Allocate the full stack space for a promoted (and split) argument.
1677 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1678 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1679 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1680 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1681 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1682 } else {
1683 SlotVT = Outs[I].ArgVT;
1684 }
1685 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1686 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1687 MemOpChains.push_back(
1688 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1689 MachinePointerInfo::getFixedStack(MF, FI)));
1690 // If the original argument was split (e.g. i128), we need
1691 // to store all parts of it here (and pass just one address).
1692 assert (Outs[I].PartOffset == 0)(static_cast <bool> (Outs[I].PartOffset == 0) ? void (0
) : __assert_fail ("Outs[I].PartOffset == 0", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1692, __extension__ __PRETTY_FUNCTION__))
;
1693 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1694 SDValue PartValue = OutVals[I + 1];
1695 unsigned PartOffset = Outs[I + 1].PartOffset;
1696 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1697 DAG.getIntPtrConstant(PartOffset, DL));
1698 MemOpChains.push_back(
1699 DAG.getStore(Chain, DL, PartValue, Address,
1700 MachinePointerInfo::getFixedStack(MF, FI)));
1701 assert((PartOffset + PartValue.getValueType().getStoreSize() <=(static_cast <bool> ((PartOffset + PartValue.getValueType
().getStoreSize() <= SlotVT.getStoreSize()) && "Not enough space for argument part!"
) ? void (0) : __assert_fail ("(PartOffset + PartValue.getValueType().getStoreSize() <= SlotVT.getStoreSize()) && \"Not enough space for argument part!\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1702, __extension__
__PRETTY_FUNCTION__))
1702 SlotVT.getStoreSize()) && "Not enough space for argument part!")(static_cast <bool> ((PartOffset + PartValue.getValueType
().getStoreSize() <= SlotVT.getStoreSize()) && "Not enough space for argument part!"
) ? void (0) : __assert_fail ("(PartOffset + PartValue.getValueType().getStoreSize() <= SlotVT.getStoreSize()) && \"Not enough space for argument part!\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1702, __extension__
__PRETTY_FUNCTION__))
;
1703 ++I;
1704 }
1705 ArgValue = SpillSlot;
1706 } else
1707 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1708
1709 if (VA.isRegLoc()) {
1710 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
1711 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
1712 // and low values.
1713 if (VA.getLocVT() == MVT::i128)
1714 ArgValue = lowerI128ToGR128(DAG, ArgValue);
1715 // Queue up the argument copies and emit them at the end.
1716 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1717 } else {
1718 assert(VA.isMemLoc() && "Argument not register or memory")(static_cast <bool> (VA.isMemLoc() && "Argument not register or memory"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"Argument not register or memory\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1718, __extension__
__PRETTY_FUNCTION__))
;
1719
1720 // Work out the address of the stack slot. Unpromoted ints and
1721 // floats are passed as right-justified 8-byte values.
1722 if (!StackPtr.getNode())
1723 StackPtr = DAG.getCopyFromReg(Chain, DL,
1724 Regs->getStackPointerRegister(), PtrVT);
1725 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
1726 VA.getLocMemOffset();
1727 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1728 Offset += 4;
1729 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1730 DAG.getIntPtrConstant(Offset, DL));
1731
1732 // Emit the store.
1733 MemOpChains.push_back(
1734 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1735
1736 // Although long doubles or vectors are passed through the stack when
1737 // they are vararg (non-fixed arguments), if a long double or vector
1738 // occupies the third and fourth slot of the argument list GPR3 should
1739 // still shadow the third slot of the argument list.
1740 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
1741 SDValue ShadowArgValue =
1742 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
1743 DAG.getIntPtrConstant(1, DL));
1744 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
1745 }
1746 }
1747 }
1748
1749 // Join the stores, which are independent of one another.
1750 if (!MemOpChains.empty())
1751 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1752
1753 // Accept direct calls by converting symbolic call addresses to the
1754 // associated Target* opcodes. Force %r1 to be used for indirect
1755 // tail calls.
1756 SDValue Glue;
1757 // FIXME: Add support for XPLINK using the ADA register.
1758 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1759 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1760 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1761 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1762 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1763 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1764 } else if (IsTailCall) {
1765 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1766 Glue = Chain.getValue(1);
1767 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1768 }
1769
1770 // Build a sequence of copy-to-reg nodes, chained and glued together.
1771 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1772 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1773 RegsToPass[I].second, Glue);
1774 Glue = Chain.getValue(1);
1775 }
1776
1777 // The first call operand is the chain and the second is the target address.
1778 SmallVector<SDValue, 8> Ops;
1779 Ops.push_back(Chain);
1780 Ops.push_back(Callee);
1781
1782 // Add argument registers to the end of the list so that they are
1783 // known live into the call.
1784 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1785 Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1786 RegsToPass[I].second.getValueType()));
1787
1788 // Add a register mask operand representing the call-preserved registers.
1789 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1790 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1791 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1791, __extension__
__PRETTY_FUNCTION__))
;
1792 Ops.push_back(DAG.getRegisterMask(Mask));
1793
1794 // Glue the call to the argument copies, if any.
1795 if (Glue.getNode())
1796 Ops.push_back(Glue);
1797
1798 // Emit the call.
1799 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1800 if (IsTailCall)
1801 return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1802 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
1803 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
1804 Glue = Chain.getValue(1);
1805
1806 // Mark the end of the call, which is glued to the call itself.
1807 Chain = DAG.getCALLSEQ_END(Chain,
1808 DAG.getConstant(NumBytes, DL, PtrVT, true),
1809 DAG.getConstant(0, DL, PtrVT, true),
1810 Glue, DL);
1811 Glue = Chain.getValue(1);
1812
1813 // Assign locations to each value returned by this call.
1814 SmallVector<CCValAssign, 16> RetLocs;
1815 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
1816 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1817
1818 // Copy all of the result registers out of their specified physreg.
1819 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1820 CCValAssign &VA = RetLocs[I];
1821
1822 // Copy the value out, gluing the copy to the end of the call sequence.
1823 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1824 VA.getLocVT(), Glue);
1825 Chain = RetValue.getValue(1);
1826 Glue = RetValue.getValue(2);
1827
1828 // Convert the value of the return register into the value that's
1829 // being returned.
1830 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1831 }
1832
1833 return Chain;
1834}
1835
1836bool SystemZTargetLowering::
1837CanLowerReturn(CallingConv::ID CallConv,
1838 MachineFunction &MF, bool isVarArg,
1839 const SmallVectorImpl<ISD::OutputArg> &Outs,
1840 LLVMContext &Context) const {
1841 // Detect unsupported vector return types.
1842 if (Subtarget.hasVector())
1843 VerifyVectorTypes(Outs);
1844
1845 // Special case that we cannot easily detect in RetCC_SystemZ since
1846 // i128 is not a legal type.
1847 for (auto &Out : Outs)
1848 if (Out.ArgVT == MVT::i128)
1849 return false;
1850
1851 SmallVector<CCValAssign, 16> RetLocs;
1852 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1853 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1854}
1855
1856SDValue
1857SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
1858 bool IsVarArg,
1859 const SmallVectorImpl<ISD::OutputArg> &Outs,
1860 const SmallVectorImpl<SDValue> &OutVals,
1861 const SDLoc &DL, SelectionDAG &DAG) const {
1862 MachineFunction &MF = DAG.getMachineFunction();
1863
1864 // Detect unsupported vector return types.
1865 if (Subtarget.hasVector())
1866 VerifyVectorTypes(Outs);
1867
1868 // Assign locations to each returned value.
1869 SmallVector<CCValAssign, 16> RetLocs;
1870 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1871 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1872
1873 // Quick exit for void returns
1874 if (RetLocs.empty())
1875 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1876
1877 if (CallConv == CallingConv::GHC)
1878 report_fatal_error("GHC functions return void only");
1879
1880 // Copy the result values into the output registers.
1881 SDValue Glue;
1882 SmallVector<SDValue, 4> RetOps;
1883 RetOps.push_back(Chain);
1884 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1885 CCValAssign &VA = RetLocs[I];
1886 SDValue RetValue = OutVals[I];
1887
1888 // Make the return register live on exit.
1889 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1889, __extension__
__PRETTY_FUNCTION__))
;
1890
1891 // Promote the value as required.
1892 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1893
1894 // Chain and glue the copies together.
1895 Register Reg = VA.getLocReg();
1896 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1897 Glue = Chain.getValue(1);
1898 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1899 }
1900
1901 // Update chain and glue.
1902 RetOps[0] = Chain;
1903 if (Glue.getNode())
1904 RetOps.push_back(Glue);
1905
1906 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
1907}
1908
1909// Return true if Op is an intrinsic node with chain that returns the CC value
1910// as its only (other) argument. Provide the associated SystemZISD opcode and
1911// the mask of valid CC values if so.
1912static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1913 unsigned &CCValid) {
1914 unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1915 switch (Id) {
1916 case Intrinsic::s390_tbegin:
1917 Opcode = SystemZISD::TBEGIN;
1918 CCValid = SystemZ::CCMASK_TBEGIN;
1919 return true;
1920
1921 case Intrinsic::s390_tbegin_nofloat:
1922 Opcode = SystemZISD::TBEGIN_NOFLOAT;
1923 CCValid = SystemZ::CCMASK_TBEGIN;
1924 return true;
1925
1926 case Intrinsic::s390_tend:
1927 Opcode = SystemZISD::TEND;
1928 CCValid = SystemZ::CCMASK_TEND;
1929 return true;
1930
1931 default:
1932 return false;
1933 }
1934}
1935
1936// Return true if Op is an intrinsic node without chain that returns the
1937// CC value as its final argument. Provide the associated SystemZISD
1938// opcode and the mask of valid CC values if so.
1939static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
1940 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1941 switch (Id) {
1942 case Intrinsic::s390_vpkshs:
1943 case Intrinsic::s390_vpksfs:
1944 case Intrinsic::s390_vpksgs:
1945 Opcode = SystemZISD::PACKS_CC;
1946 CCValid = SystemZ::CCMASK_VCMP;
1947 return true;
1948
1949 case Intrinsic::s390_vpklshs:
1950 case Intrinsic::s390_vpklsfs:
1951 case Intrinsic::s390_vpklsgs:
1952 Opcode = SystemZISD::PACKLS_CC;
1953 CCValid = SystemZ::CCMASK_VCMP;
1954 return true;
1955
1956 case Intrinsic::s390_vceqbs:
1957 case Intrinsic::s390_vceqhs:
1958 case Intrinsic::s390_vceqfs:
1959 case Intrinsic::s390_vceqgs:
1960 Opcode = SystemZISD::VICMPES;
1961 CCValid = SystemZ::CCMASK_VCMP;
1962 return true;
1963
1964 case Intrinsic::s390_vchbs:
1965 case Intrinsic::s390_vchhs:
1966 case Intrinsic::s390_vchfs:
1967 case Intrinsic::s390_vchgs:
1968 Opcode = SystemZISD::VICMPHS;
1969 CCValid = SystemZ::CCMASK_VCMP;
1970 return true;
1971
1972 case Intrinsic::s390_vchlbs:
1973 case Intrinsic::s390_vchlhs:
1974 case Intrinsic::s390_vchlfs:
1975 case Intrinsic::s390_vchlgs:
1976 Opcode = SystemZISD::VICMPHLS;
1977 CCValid = SystemZ::CCMASK_VCMP;
1978 return true;
1979
1980 case Intrinsic::s390_vtm:
1981 Opcode = SystemZISD::VTM;
1982 CCValid = SystemZ::CCMASK_VCMP;
1983 return true;
1984
1985 case Intrinsic::s390_vfaebs:
1986 case Intrinsic::s390_vfaehs:
1987 case Intrinsic::s390_vfaefs:
1988 Opcode = SystemZISD::VFAE_CC;
1989 CCValid = SystemZ::CCMASK_ANY;
1990 return true;
1991
1992 case Intrinsic::s390_vfaezbs:
1993 case Intrinsic::s390_vfaezhs:
1994 case Intrinsic::s390_vfaezfs:
1995 Opcode = SystemZISD::VFAEZ_CC;
1996 CCValid = SystemZ::CCMASK_ANY;
1997 return true;
1998
1999 case Intrinsic::s390_vfeebs:
2000 case Intrinsic::s390_vfeehs:
2001 case Intrinsic::s390_vfeefs:
2002 Opcode = SystemZISD::VFEE_CC;
2003 CCValid = SystemZ::CCMASK_ANY;
2004 return true;
2005
2006 case Intrinsic::s390_vfeezbs:
2007 case Intrinsic::s390_vfeezhs:
2008 case Intrinsic::s390_vfeezfs:
2009 Opcode = SystemZISD::VFEEZ_CC;
2010 CCValid = SystemZ::CCMASK_ANY;
2011 return true;
2012
2013 case Intrinsic::s390_vfenebs:
2014 case Intrinsic::s390_vfenehs:
2015 case Intrinsic::s390_vfenefs:
2016 Opcode = SystemZISD::VFENE_CC;
2017 CCValid = SystemZ::CCMASK_ANY;
2018 return true;
2019
2020 case Intrinsic::s390_vfenezbs:
2021 case Intrinsic::s390_vfenezhs:
2022 case Intrinsic::s390_vfenezfs:
2023 Opcode = SystemZISD::VFENEZ_CC;
2024 CCValid = SystemZ::CCMASK_ANY;
2025 return true;
2026
2027 case Intrinsic::s390_vistrbs:
2028 case Intrinsic::s390_vistrhs:
2029 case Intrinsic::s390_vistrfs:
2030 Opcode = SystemZISD::VISTR_CC;
2031 CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3;
2032 return true;
2033
2034 case Intrinsic::s390_vstrcbs:
2035 case Intrinsic::s390_vstrchs:
2036 case Intrinsic::s390_vstrcfs:
2037 Opcode = SystemZISD::VSTRC_CC;
2038 CCValid = SystemZ::CCMASK_ANY;
2039 return true;
2040
2041 case Intrinsic::s390_vstrczbs:
2042 case Intrinsic::s390_vstrczhs:
2043 case Intrinsic::s390_vstrczfs:
2044 Opcode = SystemZISD::VSTRCZ_CC;
2045 CCValid = SystemZ::CCMASK_ANY;
2046 return true;
2047
2048 case Intrinsic::s390_vstrsb:
2049 case Intrinsic::s390_vstrsh:
2050 case Intrinsic::s390_vstrsf:
2051 Opcode = SystemZISD::VSTRS_CC;
2052 CCValid = SystemZ::CCMASK_ANY;
2053 return true;
2054
2055 case Intrinsic::s390_vstrszb:
2056 case Intrinsic::s390_vstrszh:
2057 case Intrinsic::s390_vstrszf:
2058 Opcode = SystemZISD::VSTRSZ_CC;
2059 CCValid = SystemZ::CCMASK_ANY;
2060 return true;
2061
2062 case Intrinsic::s390_vfcedbs:
2063 case Intrinsic::s390_vfcesbs:
2064 Opcode = SystemZISD::VFCMPES;
2065 CCValid = SystemZ::CCMASK_VCMP;
2066 return true;
2067
2068 case Intrinsic::s390_vfchdbs:
2069 case Intrinsic::s390_vfchsbs:
2070 Opcode = SystemZISD::VFCMPHS;
2071 CCValid = SystemZ::CCMASK_VCMP;
2072 return true;
2073
2074 case Intrinsic::s390_vfchedbs:
2075 case Intrinsic::s390_vfchesbs:
2076 Opcode = SystemZISD::VFCMPHES;
2077 CCValid = SystemZ::CCMASK_VCMP;
2078 return true;
2079
2080 case Intrinsic::s390_vftcidb:
2081 case Intrinsic::s390_vftcisb:
2082 Opcode = SystemZISD::VFTCI;
2083 CCValid = SystemZ::CCMASK_VCMP;
2084 return true;
2085
2086 case Intrinsic::s390_tdc:
2087 Opcode = SystemZISD::TDC;
2088 CCValid = SystemZ::CCMASK_TDC;
2089 return true;
2090
2091 default:
2092 return false;
2093 }
2094}
2095
2096// Emit an intrinsic with chain and an explicit CC register result.
2097static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op,
2098 unsigned Opcode) {
2099 // Copy all operands except the intrinsic ID.
2100 unsigned NumOps = Op.getNumOperands();
2101 SmallVector<SDValue, 6> Ops;
2102 Ops.reserve(NumOps - 1);
2103 Ops.push_back(Op.getOperand(0));
2104 for (unsigned I = 2; I < NumOps; ++I)
2105 Ops.push_back(Op.getOperand(I));
2106
2107 assert(Op->getNumValues() == 2 && "Expected only CC result and chain")(static_cast <bool> (Op->getNumValues() == 2 &&
"Expected only CC result and chain") ? void (0) : __assert_fail
("Op->getNumValues() == 2 && \"Expected only CC result and chain\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2107, __extension__
__PRETTY_FUNCTION__))
;
2108 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2109 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2110 SDValue OldChain = SDValue(Op.getNode(), 1);
2111 SDValue NewChain = SDValue(Intr.getNode(), 1);
2112 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2113 return Intr.getNode();
2114}
2115
2116// Emit an intrinsic with an explicit CC register result.
2117static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op,
2118 unsigned Opcode) {
2119 // Copy all operands except the intrinsic ID.
2120 unsigned NumOps = Op.getNumOperands();
2121 SmallVector<SDValue, 6> Ops;
2122 Ops.reserve(NumOps - 1);
2123 for (unsigned I = 1; I < NumOps; ++I)
2124 Ops.push_back(Op.getOperand(I));
2125
2126 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2127 return Intr.getNode();
2128}
2129
2130// CC is a comparison that will be implemented using an integer or
2131// floating-point comparison. Return the condition code mask for
2132// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2133// unsigned comparisons and clear for signed ones. In the floating-point
2134// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2135static unsigned CCMaskForCondCode(ISD::CondCode CC) {
2136#define CONV(X) \
2137 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2138 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2139 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2140
2141 switch (CC) {
2142 default:
2143 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2143)
;
2144
2145 CONV(EQ);
2146 CONV(NE);
2147 CONV(GT);
2148 CONV(GE);
2149 CONV(LT);
2150 CONV(LE);
2151
2152 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2153 case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
2154 }
2155#undef CONV
2156}
2157
2158// If C can be converted to a comparison against zero, adjust the operands
2159// as necessary.
2160static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2161 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2162 return;
2163
2164 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2165 if (!ConstOp1)
2166 return;
2167
2168 int64_t Value = ConstOp1->getSExtValue();
2169 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2170 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2171 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2172 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2173 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2174 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2175 }
2176}
2177
2178// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2179// adjust the operands as necessary.
2180static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2181 Comparison &C) {
2182 // For us to make any changes, it must a comparison between a single-use
2183 // load and a constant.
2184 if (!C.Op0.hasOneUse() ||
2185 C.Op0.getOpcode() != ISD::LOAD ||
2186 C.Op1.getOpcode() != ISD::Constant)
2187 return;
2188
2189 // We must have an 8- or 16-bit load.
2190 auto *Load = cast<LoadSDNode>(C.Op0);
2191 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2192 if ((NumBits != 8 && NumBits != 16) ||
2193 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2194 return;
2195
2196 // The load must be an extending one and the constant must be within the
2197 // range of the unextended value.
2198 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2199 uint64_t Value = ConstOp1->getZExtValue();
2200 uint64_t Mask = (1 << NumBits) - 1;
2201 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2202 // Make sure that ConstOp1 is in range of C.Op0.
2203 int64_t SignedValue = ConstOp1->getSExtValue();
2204 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2205 return;
2206 if (C.ICmpType != SystemZICMP::SignedOnly) {
2207 // Unsigned comparison between two sign-extended values is equivalent
2208 // to unsigned comparison between two zero-extended values.
2209 Value &= Mask;
2210 } else if (NumBits == 8) {
2211 // Try to treat the comparison as unsigned, so that we can use CLI.
2212 // Adjust CCMask and Value as necessary.
2213 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2214 // Test whether the high bit of the byte is set.
2215 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2216 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2217 // Test whether the high bit of the byte is clear.
2218 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2219 else
2220 // No instruction exists for this combination.
2221 return;
2222 C.ICmpType = SystemZICMP::UnsignedOnly;
2223 }
2224 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2225 if (Value > Mask)
2226 return;
2227 // If the constant is in range, we can use any comparison.
2228 C.ICmpType = SystemZICMP::Any;
2229 } else
2230 return;
2231
2232 // Make sure that the first operand is an i32 of the right extension type.
2233 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2234 ISD::SEXTLOAD :
2235 ISD::ZEXTLOAD);
2236 if (C.Op0.getValueType() != MVT::i32 ||
2237 Load->getExtensionType() != ExtType) {
2238 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2239 Load->getBasePtr(), Load->getPointerInfo(),
2240 Load->getMemoryVT(), Load->getAlignment(),
2241 Load->getMemOperand()->getFlags());
2242 // Update the chain uses.
2243 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2244 }
2245
2246 // Make sure that the second operand is an i32 with the right value.
2247 if (C.Op1.getValueType() != MVT::i32 ||
2248 Value != ConstOp1->getZExtValue())
2249 C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2250}
2251
2252// Return true if Op is either an unextended load, or a load suitable
2253// for integer register-memory comparisons of type ICmpType.
2254static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2255 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2256 if (Load) {
2257 // There are no instructions to compare a register with a memory byte.
2258 if (Load->getMemoryVT() == MVT::i8)
2259 return false;
2260 // Otherwise decide on extension type.
2261 switch (Load->getExtensionType()) {
2262 case ISD::NON_EXTLOAD:
2263 return true;
2264 case ISD::SEXTLOAD:
2265 return ICmpType != SystemZICMP::UnsignedOnly;
2266 case ISD::ZEXTLOAD:
2267 return ICmpType != SystemZICMP::SignedOnly;
2268 default:
2269 break;
2270 }
2271 }
2272 return false;
2273}
2274
2275// Return true if it is better to swap the operands of C.
2276static bool shouldSwapCmpOperands(const Comparison &C) {
2277 // Leave f128 comparisons alone, since they have no memory forms.
2278 if (C.Op0.getValueType() == MVT::f128)
2279 return false;
2280
2281 // Always keep a floating-point constant second, since comparisons with
2282 // zero can use LOAD TEST and comparisons with other constants make a
2283 // natural memory operand.
2284 if (isa<ConstantFPSDNode>(C.Op1))
2285 return false;
2286
2287 // Never swap comparisons with zero since there are many ways to optimize
2288 // those later.
2289 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2290 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2291 return false;
2292
2293 // Also keep natural memory operands second if the loaded value is
2294 // only used here. Several comparisons have memory forms.
2295 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2296 return false;
2297
2298 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2299 // In that case we generally prefer the memory to be second.
2300 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2301 // The only exceptions are when the second operand is a constant and
2302 // we can use things like CHHSI.
2303 if (!ConstOp1)
2304 return true;
2305 // The unsigned memory-immediate instructions can handle 16-bit
2306 // unsigned integers.
2307 if (C.ICmpType != SystemZICMP::SignedOnly &&
2308 isUInt<16>(ConstOp1->getZExtValue()))
2309 return false;
2310 // The signed memory-immediate instructions can handle 16-bit
2311 // signed integers.
2312 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2313 isInt<16>(ConstOp1->getSExtValue()))
2314 return false;
2315 return true;
2316 }
2317
2318 // Try to promote the use of CGFR and CLGFR.
2319 unsigned Opcode0 = C.Op0.getOpcode();
2320 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2321 return true;
2322 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2323 return true;
2324 if (C.ICmpType != SystemZICMP::SignedOnly &&
2325 Opcode0 == ISD::AND &&
2326 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2327 cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
2328 return true;
2329
2330 return false;
2331}
2332
2333// Check whether C tests for equality between X and Y and whether X - Y
2334// or Y - X is also computed. In that case it's better to compare the
2335// result of the subtraction against zero.
2336static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
2337 Comparison &C) {
2338 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2339 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2340 for (SDNode *N : C.Op0->uses()) {
2341 if (N->getOpcode() == ISD::SUB &&
2342 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2343 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2344 C.Op0 = SDValue(N, 0);
2345 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2346 return;
2347 }
2348 }
2349 }
2350}
2351
2352// Check whether C compares a floating-point value with zero and if that
2353// floating-point value is also negated. In this case we can use the
2354// negation to set CC, so avoiding separate LOAD AND TEST and
2355// LOAD (NEGATIVE/COMPLEMENT) instructions.
2356static void adjustForFNeg(Comparison &C) {
2357 // This optimization is invalid for strict comparisons, since FNEG
2358 // does not raise any exceptions.
2359 if (C.Chain)
2360 return;
2361 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2362 if (C1 && C1->isZero()) {
2363 for (SDNode *N : C.Op0->uses()) {
2364 if (N->getOpcode() == ISD::FNEG) {
2365 C.Op0 = SDValue(N, 0);
2366 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2367 return;
2368 }
2369 }
2370 }
2371}
2372
2373// Check whether C compares (shl X, 32) with 0 and whether X is
2374// also sign-extended. In that case it is better to test the result
2375// of the sign extension using LTGFR.
2376//
2377// This case is important because InstCombine transforms a comparison
2378// with (sext (trunc X)) into a comparison with (shl X, 32).
2379static void adjustForLTGFR(Comparison &C) {
2380 // Check for a comparison between (shl X, 32) and 0.
2381 if (C.Op0.getOpcode() == ISD::SHL &&
2382 C.Op0.getValueType() == MVT::i64 &&
2383 C.Op1.getOpcode() == ISD::Constant &&
2384 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2385 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2386 if (C1 && C1->getZExtValue() == 32) {
2387 SDValue ShlOp0 = C.Op0.getOperand(0);
2388 // See whether X has any SIGN_EXTEND_INREG uses.
2389 for (SDNode *N : ShlOp0->uses()) {
2390 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2391 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2392 C.Op0 = SDValue(N, 0);
2393 return;
2394 }
2395 }
2396 }
2397 }
2398}
2399
2400// If C compares the truncation of an extending load, try to compare
2401// the untruncated value instead. This exposes more opportunities to
2402// reuse CC.
2403static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2404 Comparison &C) {
2405 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2406 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2407 C.Op1.getOpcode() == ISD::Constant &&
2408 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2409 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2410 if (L->getMemoryVT().getStoreSizeInBits().getFixedSize() <=
2411 C.Op0.getValueSizeInBits().getFixedSize()) {
2412 unsigned Type = L->getExtensionType();
2413 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2414 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2415 C.Op0 = C.Op0.getOperand(0);
2416 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2417 }
2418 }
2419 }
2420}
2421
2422// Return true if shift operation N has an in-range constant shift value.
2423// Store it in ShiftVal if so.
2424static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2425 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2426 if (!Shift)
2427 return false;
2428
2429 uint64_t Amount = Shift->getZExtValue();
2430 if (Amount >= N.getValueSizeInBits())
2431 return false;
2432
2433 ShiftVal = Amount;
2434 return true;
2435}
2436
2437// Check whether an AND with Mask is suitable for a TEST UNDER MASK
2438// instruction and whether the CC value is descriptive enough to handle
2439// a comparison of type Opcode between the AND result and CmpVal.
2440// CCMask says which comparison result is being tested and BitSize is
2441// the number of bits in the operands. If TEST UNDER MASK can be used,
2442// return the corresponding CC mask, otherwise return 0.
2443static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2444 uint64_t Mask, uint64_t CmpVal,
2445 unsigned ICmpType) {
2446 assert(Mask != 0 && "ANDs with zero should have been removed by now")(static_cast <bool> (Mask != 0 && "ANDs with zero should have been removed by now"
) ? void (0) : __assert_fail ("Mask != 0 && \"ANDs with zero should have been removed by now\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2446, __extension__
__PRETTY_FUNCTION__))
;
2447
2448 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2449 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2450 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2451 return 0;
2452
2453 // Work out the masks for the lowest and highest bits.
2454 unsigned HighShift = 63 - countLeadingZeros(Mask);
2455 uint64_t High = uint64_t(1) << HighShift;
2456 uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
2457
2458 // Signed ordered comparisons are effectively unsigned if the sign
2459 // bit is dropped.
2460 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2461
2462 // Check for equality comparisons with 0, or the equivalent.
2463 if (CmpVal == 0) {
2464 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2465 return SystemZ::CCMASK_TM_ALL_0;
2466 if (CCMask == SystemZ::CCMASK_CMP_NE)
2467 return SystemZ::CCMASK_TM_SOME_1;
2468 }
2469 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2470 if (CCMask == SystemZ::CCMASK_CMP_LT)
2471 return SystemZ::CCMASK_TM_ALL_0;
2472 if (CCMask == SystemZ::CCMASK_CMP_GE)
2473 return SystemZ::CCMASK_TM_SOME_1;
2474 }
2475 if (EffectivelyUnsigned && CmpVal < Low) {
2476 if (CCMask == SystemZ::CCMASK_CMP_LE)
2477 return SystemZ::CCMASK_TM_ALL_0;
2478 if (CCMask == SystemZ::CCMASK_CMP_GT)
2479 return SystemZ::CCMASK_TM_SOME_1;
2480 }
2481
2482 // Check for equality comparisons with the mask, or the equivalent.
2483 if (CmpVal == Mask) {
2484 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2485 return SystemZ::CCMASK_TM_ALL_1;
2486 if (CCMask == SystemZ::CCMASK_CMP_NE)
2487 return SystemZ::CCMASK_TM_SOME_0;
2488 }
2489 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2490 if (CCMask == SystemZ::CCMASK_CMP_GT)
2491 return SystemZ::CCMASK_TM_ALL_1;
2492 if (CCMask == SystemZ::CCMASK_CMP_LE)
2493 return SystemZ::CCMASK_TM_SOME_0;
2494 }
2495 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2496 if (CCMask == SystemZ::CCMASK_CMP_GE)
2497 return SystemZ::CCMASK_TM_ALL_1;
2498 if (CCMask == SystemZ::CCMASK_CMP_LT)
2499 return SystemZ::CCMASK_TM_SOME_0;
2500 }
2501
2502 // Check for ordered comparisons with the top bit.
2503 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2504 if (CCMask == SystemZ::CCMASK_CMP_LE)
2505 return SystemZ::CCMASK_TM_MSB_0;
2506 if (CCMask == SystemZ::CCMASK_CMP_GT)
2507 return SystemZ::CCMASK_TM_MSB_1;
2508 }
2509 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2510 if (CCMask == SystemZ::CCMASK_CMP_LT)
2511 return SystemZ::CCMASK_TM_MSB_0;
2512 if (CCMask == SystemZ::CCMASK_CMP_GE)
2513 return SystemZ::CCMASK_TM_MSB_1;
2514 }
2515
2516 // If there are just two bits, we can do equality checks for Low and High
2517 // as well.
2518 if (Mask == Low + High) {
2519 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2520 return SystemZ::CCMASK_TM_MIXED_MSB_0;
2521 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2522 return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY;
2523 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2524 return SystemZ::CCMASK_TM_MIXED_MSB_1;
2525 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2526 return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY;
2527 }
2528
2529 // Looks like we've exhausted our options.
2530 return 0;
2531}
2532
2533// See whether C can be implemented as a TEST UNDER MASK instruction.
2534// Update the arguments with the TM version if so.
2535static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
2536 Comparison &C) {
2537 // Check that we have a comparison with a constant.
2538 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2539 if (!ConstOp1)
2540 return;
2541 uint64_t CmpVal = ConstOp1->getZExtValue();
2542
2543 // Check whether the nonconstant input is an AND with a constant mask.
2544 Comparison NewC(C);
2545 uint64_t MaskVal;
2546 ConstantSDNode *Mask = nullptr;
2547 if (C.Op0.getOpcode() == ISD::AND) {
2548 NewC.Op0 = C.Op0.getOperand(0);
2549 NewC.Op1 = C.Op0.getOperand(1);
2550 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2551 if (!Mask)
2552 return;
2553 MaskVal = Mask->getZExtValue();
2554 } else {
2555 // There is no instruction to compare with a 64-bit immediate
2556 // so use TMHH instead if possible. We need an unsigned ordered
2557 // comparison with an i64 immediate.
2558 if (NewC.Op0.getValueType() != MVT::i64 ||
2559 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2560 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2561 NewC.ICmpType == SystemZICMP::SignedOnly)
2562 return;
2563 // Convert LE and GT comparisons into LT and GE.
2564 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2565 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2566 if (CmpVal == uint64_t(-1))
2567 return;
2568 CmpVal += 1;
2569 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2570 }
2571 // If the low N bits of Op1 are zero than the low N bits of Op0 can
2572 // be masked off without changing the result.
2573 MaskVal = -(CmpVal & -CmpVal);
2574 NewC.ICmpType = SystemZICMP::UnsignedOnly;
2575 }
2576 if (!MaskVal)
2577 return;
2578
2579 // Check whether the combination of mask, comparison value and comparison
2580 // type are suitable.
2581 unsigned BitSize = NewC.Op0.getValueSizeInBits();
2582 unsigned NewCCMask, ShiftVal;
2583 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2584 NewC.Op0.getOpcode() == ISD::SHL &&
2585 isSimpleShift(NewC.Op0, ShiftVal) &&
2586 (MaskVal >> ShiftVal != 0) &&
2587 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2588 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2589 MaskVal >> ShiftVal,
2590 CmpVal >> ShiftVal,
2591 SystemZICMP::Any))) {
2592 NewC.Op0 = NewC.Op0.getOperand(0);
2593 MaskVal >>= ShiftVal;
2594 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2595 NewC.Op0.getOpcode() == ISD::SRL &&
2596 isSimpleShift(NewC.Op0, ShiftVal) &&
2597 (MaskVal << ShiftVal != 0) &&
2598 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2599 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2600 MaskVal << ShiftVal,
2601 CmpVal << ShiftVal,
2602 SystemZICMP::UnsignedOnly))) {
2603 NewC.Op0 = NewC.Op0.getOperand(0);
2604 MaskVal <<= ShiftVal;
2605 } else {
2606 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2607 NewC.ICmpType);
2608 if (!NewCCMask)
2609 return;
2610 }
2611
2612 // Go ahead and make the change.
2613 C.Opcode = SystemZISD::TM;
2614 C.Op0 = NewC.Op0;
2615 if (Mask && Mask->getZExtValue() == MaskVal)
2616 C.Op1 = SDValue(Mask, 0);
2617 else
2618 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2619 C.CCValid = SystemZ::CCMASK_TM;
2620 C.CCMask = NewCCMask;
2621}
2622
2623// See whether the comparison argument contains a redundant AND
2624// and remove it if so. This sometimes happens due to the generic
2625// BRCOND expansion.
2626static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
2627 Comparison &C) {
2628 if (C.Op0.getOpcode() != ISD::AND)
2629 return;
2630 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2631 if (!Mask)
2632 return;
2633 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
2634 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2635 return;
2636
2637 C.Op0 = C.Op0.getOperand(0);
2638}
2639
2640// Return a Comparison that tests the condition-code result of intrinsic
2641// node Call against constant integer CC using comparison code Cond.
2642// Opcode is the opcode of the SystemZISD operation for the intrinsic
2643// and CCValid is the set of possible condition-code results.
2644static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2645 SDValue Call, unsigned CCValid, uint64_t CC,
2646 ISD::CondCode Cond) {
2647 Comparison C(Call, SDValue(), SDValue());
2648 C.Opcode = Opcode;
2649 C.CCValid = CCValid;
2650 if (Cond == ISD::SETEQ)
2651 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2652 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2653 else if (Cond == ISD::SETNE)
2654 // ...and the inverse of that.
2655 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2656 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2657 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2658 // always true for CC>3.
2659 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
2660 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2661 // ...and the inverse of that.
2662 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2663 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2664 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2665 // always true for CC>3.
2666 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2667 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2668 // ...and the inverse of that.
2669 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
2670 else
2671 llvm_unreachable("Unexpected integer comparison type")::llvm::llvm_unreachable_internal("Unexpected integer comparison type"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2671)
;
2672 C.CCMask &= CCValid;
2673 return C;
2674}
2675
2676// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2677static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2678 ISD::CondCode Cond, const SDLoc &DL,
2679 SDValue Chain = SDValue(),
2680 bool IsSignaling = false) {
2681 if (CmpOp1.getOpcode() == ISD::Constant) {
2682 assert(!Chain)(static_cast <bool> (!Chain) ? void (0) : __assert_fail
("!Chain", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2682, __extension__ __PRETTY_FUNCTION__))
;
2683 uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2684 unsigned Opcode, CCValid;
2685 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2686 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2687 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2688 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2689 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2690 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2691 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2692 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2693 }
2694 Comparison C(CmpOp0, CmpOp1, Chain);
2695 C.CCMask = CCMaskForCondCode(Cond);
2696 if (C.Op0.getValueType().isFloatingPoint()) {
2697 C.CCValid = SystemZ::CCMASK_FCMP;
2698 if (!C.Chain)
2699 C.Opcode = SystemZISD::FCMP;
2700 else if (!IsSignaling)
2701 C.Opcode = SystemZISD::STRICT_FCMP;
2702 else
2703 C.Opcode = SystemZISD::STRICT_FCMPS;
2704 adjustForFNeg(C);
2705 } else {
2706 assert(!C.Chain)(static_cast <bool> (!C.Chain) ? void (0) : __assert_fail
("!C.Chain", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2706, __extension__ __PRETTY_FUNCTION__))
;
2707 C.CCValid = SystemZ::CCMASK_ICMP;
2708 C.Opcode = SystemZISD::ICMP;
2709 // Choose the type of comparison. Equality and inequality tests can
2710 // use either signed or unsigned comparisons. The choice also doesn't
2711 // matter if both sign bits are known to be clear. In those cases we
2712 // want to give the main isel code the freedom to choose whichever
2713 // form fits best.
2714 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2715 C.CCMask == SystemZ::CCMASK_CMP_NE ||
2716 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2717 C.ICmpType = SystemZICMP::Any;
2718 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2719 C.ICmpType = SystemZICMP::UnsignedOnly;
2720 else
2721 C.ICmpType = SystemZICMP::SignedOnly;
2722 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2723 adjustForRedundantAnd(DAG, DL, C);
2724 adjustZeroCmp(DAG, DL, C);
2725 adjustSubwordCmp(DAG, DL, C);
2726 adjustForSubtraction(DAG, DL, C);
2727 adjustForLTGFR(C);
2728 adjustICmpTruncate(DAG, DL, C);
2729 }
2730
2731 if (shouldSwapCmpOperands(C)) {
2732 std::swap(C.Op0, C.Op1);
2733 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2734 }
2735
2736 adjustForTestUnderMask(DAG, DL, C);
2737 return C;
2738}
2739
2740// Emit the comparison instruction described by C.
2741static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2742 if (!C.Op1.getNode()) {
2743 SDNode *Node;
2744 switch (C.Op0.getOpcode()) {
2745 case ISD::INTRINSIC_W_CHAIN:
2746 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
2747 return SDValue(Node, 0);
2748 case ISD::INTRINSIC_WO_CHAIN:
2749 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
2750 return SDValue(Node, Node->getNumValues() - 1);
2751 default:
2752 llvm_unreachable("Invalid comparison operands")::llvm::llvm_unreachable_internal("Invalid comparison operands"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2752)
;
2753 }
2754 }
2755 if (C.Opcode == SystemZISD::ICMP)
2756 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
2757 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
2758 if (C.Opcode == SystemZISD::TM) {
2759 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2760 bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2761 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
2762 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
2763 }
2764 if (C.Chain) {
2765 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
2766 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
2767 }
2768 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
2769}
2770
2771// Implement a 32-bit *MUL_LOHI operation by extending both operands to
2772// 64 bits. Extend is the extension type to use. Store the high part
2773// in Hi and the low part in Lo.
2774static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
2775 SDValue Op0, SDValue Op1, SDValue &Hi,
2776 SDValue &Lo) {
2777 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2778 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2779 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
2780 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2781 DAG.getConstant(32, DL, MVT::i64));
2782 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2783 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2784}
2785
2786// Lower a binary operation that produces two VT results, one in each
2787// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2788// and Opcode performs the GR128 operation. Store the even register result
2789// in Even and the odd register result in Odd.
2790static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2791 unsigned Opcode, SDValue Op0, SDValue Op1,
2792 SDValue &Even, SDValue &Odd) {
2793 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
2794 bool Is32Bit = is32Bit(VT);
2795 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2796 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
2797}
2798
2799// Return an i32 value that is 1 if the CC value produced by CCReg is
2800// in the mask CCMask and 0 otherwise. CC is known to have a value
2801// in CCValid, so other values can be ignored.
2802static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
2803 unsigned CCValid, unsigned CCMask) {
2804 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
2805 DAG.getConstant(0, DL, MVT::i32),
2806 DAG.getTargetConstant(CCValid, DL, MVT::i32),
2807 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
2808 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
2809}
2810
2811// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2812// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
2813// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
2814// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
2815// floating-point comparisons.
2816enum class CmpMode { Int, FP, StrictFP, SignalingFP };
2817static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) {
2818 switch (CC) {
2819 case ISD::SETOEQ:
2820 case ISD::SETEQ:
2821 switch (Mode) {
2822 case CmpMode::Int: return SystemZISD::VICMPE;
2823 case CmpMode::FP: return SystemZISD::VFCMPE;
2824 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
2825 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
2826 }
2827 llvm_unreachable("Bad mode")::llvm::llvm_unreachable_internal("Bad mode", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2827)
;
2828
2829 case ISD::SETOGE:
2830 case ISD::SETGE:
2831 switch (Mode) {
2832 case CmpMode::Int: return 0;
2833 case CmpMode::FP: return SystemZISD::VFCMPHE;
2834 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
2835 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
2836 }
2837 llvm_unreachable("Bad mode")::llvm::llvm_unreachable_internal("Bad mode", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2837)
;
2838
2839 case ISD::SETOGT:
2840 case ISD::SETGT:
2841 switch (Mode) {
2842 case CmpMode::Int: return SystemZISD::VICMPH;
2843 case CmpMode::FP: return SystemZISD::VFCMPH;
2844 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
2845 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
2846 }
2847 llvm_unreachable("Bad mode")::llvm::llvm_unreachable_internal("Bad mode", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2847)
;
2848
2849 case ISD::SETUGT:
2850 switch (Mode) {
2851 case CmpMode::Int: return SystemZISD::VICMPHL;
2852 case CmpMode::FP: return 0;
2853 case CmpMode::StrictFP: return 0;
2854 case CmpMode::SignalingFP: return 0;
2855 }
2856 llvm_unreachable("Bad mode")::llvm::llvm_unreachable_internal("Bad mode", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2856)
;
2857
2858 default:
2859 return 0;
2860 }
2861}
2862
2863// Return the SystemZISD vector comparison operation for CC or its inverse,
2864// or 0 if neither can be done directly. Indicate in Invert whether the
2865// result is for the inverse of CC. Mode is as above.
2866static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode,
2867 bool &Invert) {
2868 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2869 Invert = false;
2870 return Opcode;
2871 }
2872
2873 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
2874 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2875 Invert = true;
2876 return Opcode;
2877 }
2878
2879 return 0;
2880}
2881
2882// Return a v2f64 that contains the extended form of elements Start and Start+1
2883// of v4f32 value Op. If Chain is nonnull, return the strict form.
2884static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
2885 SDValue Op, SDValue Chain) {
2886 int Mask[] = { Start, -1, Start + 1, -1 };
2887 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
2888 if (Chain) {
2889 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
2890 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
2891 }
2892 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2893}
2894
2895// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2896// producing a result of type VT. If Chain is nonnull, return the strict form.
2897SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
2898 const SDLoc &DL, EVT VT,
2899 SDValue CmpOp0,
2900 SDValue CmpOp1,
2901 SDValue Chain) const {
2902 // There is no hardware support for v4f32 (unless we have the vector
2903 // enhancements facility 1), so extend the vector into two v2f64s
2904 // and compare those.
2905 if (CmpOp0.getValueType() == MVT::v4f32 &&
2906 !Subtarget.hasVectorEnhancements1()) {
2907 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
2908 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
2909 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
2910 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
2911 if (Chain) {
2912 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
2913 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
2914 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
2915 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2916 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
2917 H1.getValue(1), L1.getValue(1),
2918 HRes.getValue(1), LRes.getValue(1) };
2919 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
2920 SDValue Ops[2] = { Res, NewChain };
2921 return DAG.getMergeValues(Ops, DL);
2922 }
2923 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
2924 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
2925 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2926 }
2927 if (Chain) {
2928 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
2929 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
2930 }
2931 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
2932}
2933
2934// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
2935// an integer mask of type VT. If Chain is nonnull, we have a strict
2936// floating-point comparison. If in addition IsSignaling is true, we have
2937// a strict signaling floating-point comparison.
2938SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
2939 const SDLoc &DL, EVT VT,
2940 ISD::CondCode CC,
2941 SDValue CmpOp0,
2942 SDValue CmpOp1,
2943 SDValue Chain,
2944 bool IsSignaling) const {
2945 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
2946 assert (!Chain || IsFP)(static_cast <bool> (!Chain || IsFP) ? void (0) : __assert_fail
("!Chain || IsFP", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2946, __extension__ __PRETTY_FUNCTION__))
;
2947 assert (!IsSignaling || Chain)(static_cast <bool> (!IsSignaling || Chain) ? void (0) :
__assert_fail ("!IsSignaling || Chain", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2947, __extension__ __PRETTY_FUNCTION__))
;
2948 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
2949 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
2950 bool Invert = false;
2951 SDValue Cmp;
2952 switch (CC) {
2953 // Handle tests for order using (or (ogt y x) (oge x y)).
2954 case ISD::SETUO:
2955 Invert = true;
2956 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2957 case ISD::SETO: {
2958 assert(IsFP && "Unexpected integer comparison")(static_cast <bool> (IsFP && "Unexpected integer comparison"
) ? void (0) : __assert_fail ("IsFP && \"Unexpected integer comparison\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2958, __extension__
__PRETTY_FUNCTION__))
;
2959 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
2960 DL, VT, CmpOp1, CmpOp0, Chain);
2961 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
2962 DL, VT, CmpOp0, CmpOp1, Chain);
2963 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
2964 if (Chain)
2965 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2966 LT.getValue(1), GE.getValue(1));
2967 break;
2968 }
2969
2970 // Handle <> tests using (or (ogt y x) (ogt x y)).
2971 case ISD::SETUEQ:
2972 Invert = true;
2973 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2974 case ISD::SETONE: {
2975 assert(IsFP && "Unexpected integer comparison")(static_cast <bool> (IsFP && "Unexpected integer comparison"
) ? void (0) : __assert_fail ("IsFP && \"Unexpected integer comparison\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2975, __extension__
__PRETTY_FUNCTION__))
;
2976 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
2977 DL, VT, CmpOp1, CmpOp0, Chain);
2978 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
2979 DL, VT, CmpOp0, CmpOp1, Chain);
2980 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
2981 if (Chain)
2982 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2983 LT.getValue(1), GT.getValue(1));
2984 break;
2985 }
2986
2987 // Otherwise a single comparison is enough. It doesn't really
2988 // matter whether we try the inversion or the swap first, since
2989 // there are no cases where both work.
2990 default:
2991 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
2992 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
2993 else {
2994 CC = ISD::getSetCCSwappedOperands(CC);
2995 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
2996 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
2997 else
2998 llvm_unreachable("Unhandled comparison")::llvm::llvm_unreachable_internal("Unhandled comparison", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2998)
;
2999 }
3000 if (Chain)
3001 Chain = Cmp.getValue(1);
3002 break;
3003 }
3004 if (Invert) {
3005 SDValue Mask =
3006 DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
3007 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3008 }
3009 if (Chain && Chain.getNode() != Cmp.getNode()) {
3010 SDValue Ops[2] = { Cmp, Chain };
3011 Cmp = DAG.getMergeValues(Ops, DL);
3012 }
3013 return Cmp;
3014}
3015
3016SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3017 SelectionDAG &DAG) const {
3018 SDValue CmpOp0 = Op.getOperand(0);
3019 SDValue CmpOp1 = Op.getOperand(1);
3020 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3021 SDLoc DL(Op);
3022 EVT VT = Op.getValueType();
3023 if (VT.isVector())
3024 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3025
3026 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3027 SDValue CCReg = emitCmp(DAG, DL, C);
3028 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3029}
3030
3031SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3032 SelectionDAG &DAG,
3033 bool IsSignaling) const {
3034 SDValue Chain = Op.getOperand(0);
3035 SDValue CmpOp0 = Op.getOperand(1);
3036 SDValue CmpOp1 = Op.getOperand(2);
3037 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3038 SDLoc DL(Op);
3039 EVT VT = Op.getNode()->getValueType(0);
3040 if (VT.isVector()) {
3041 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3042 Chain, IsSignaling);
3043 return Res.getValue(Op.getResNo());
3044 }
3045
3046 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3047 SDValue CCReg = emitCmp(DAG, DL, C);
3048 CCReg->setFlags(Op->getFlags());
3049 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3050 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3051 return DAG.getMergeValues(Ops, DL);
3052}
3053
3054SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3055 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3056 SDValue CmpOp0 = Op.getOperand(2);
3057 SDValue CmpOp1 = Op.getOperand(3);
3058 SDValue Dest = Op.getOperand(4);
3059 SDLoc DL(Op);
3060
3061 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3062 SDValue CCReg = emitCmp(DAG, DL, C);
3063 return DAG.getNode(
3064 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3065 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3066 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3067}
3068
3069// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3070// allowing Pos and Neg to be wider than CmpOp.
3071static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3072 return (Neg.getOpcode() == ISD::SUB &&
3073 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3074 cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
3075 Neg.getOperand(1) == Pos &&
3076 (Pos == CmpOp ||
3077 (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3078 Pos.getOperand(0) == CmpOp)));
3079}
3080
3081// Return the absolute or negative absolute of Op; IsNegative decides which.
3082static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op,
3083 bool IsNegative) {
3084 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3085 if (IsNegative)
3086 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3087 DAG.getConstant(0, DL, Op.getValueType()), Op);
3088 return Op;
3089}
3090
3091SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3092 SelectionDAG &DAG) const {
3093 SDValue CmpOp0 = Op.getOperand(0);
3094 SDValue CmpOp1 = Op.getOperand(1);
3095 SDValue TrueOp = Op.getOperand(2);
3096 SDValue FalseOp = Op.getOperand(3);
3097 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3098 SDLoc DL(Op);
3099
3100 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3101
3102 // Check for absolute and negative-absolute selections, including those
3103 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3104 // This check supplements the one in DAGCombiner.
3105 if (C.Opcode == SystemZISD::ICMP &&
3106 C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3107 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3108 C.Op1.getOpcode() == ISD::Constant &&
3109 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
3110 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3111 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3112 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3113 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3114 }
3115
3116 SDValue CCReg = emitCmp(DAG, DL, C);
3117 SDValue Ops[] = {TrueOp, FalseOp,
3118 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3119 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3120
3121 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3122}
3123
3124SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3125 SelectionDAG &DAG) const {
3126 SDLoc DL(Node);
3127 const GlobalValue *GV = Node->getGlobal();
3128 int64_t Offset = Node->getOffset();
3129 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3130 CodeModel::Model CM = DAG.getTarget().getCodeModel();
3131
3132 SDValue Result;
3133 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3134 if (isInt<32>(Offset)) {
3135 // Assign anchors at 1<<12 byte boundaries.
3136 uint64_t Anchor = Offset & ~uint64_t(0xfff);
3137 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3138 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3139
3140 // The offset can be folded into the address if it is aligned to a
3141 // halfword.
3142 Offset -= Anchor;
3143 if (Offset != 0 && (Offset & 1) == 0) {
3144 SDValue Full =
3145 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3146 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3147 Offset = 0;
3148 }
3149 } else {
3150 // Conservatively load a constant offset greater than 32 bits into a
3151 // register below.
3152 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3153 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3154 }
3155 } else {
3156 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3157 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3158 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3159 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3160 }
3161
3162 // If there was a non-zero offset that we didn't fold, create an explicit
3163 // addition for it.
3164 if (Offset != 0)
3165 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3166 DAG.getConstant(Offset, DL, PtrVT));
3167
3168 return Result;
3169}
3170
3171SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3172 SelectionDAG &DAG,
3173 unsigned Opcode,
3174 SDValue GOTOffset) const {
3175 SDLoc DL(Node);
3176 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3177 SDValue Chain = DAG.getEntryNode();
3178 SDValue Glue;
3179
3180 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3181 CallingConv::GHC)
3182 report_fatal_error("In GHC calling convention TLS is not supported");
3183
3184 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3185 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3186 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3187 Glue = Chain.getValue(1);
3188 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3189 Glue = Chain.getValue(1);
3190
3191 // The first call operand is the chain and the second is the TLS symbol.
3192 SmallVector<SDValue, 8> Ops;
3193 Ops.push_back(Chain);
3194 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3195 Node->getValueType(0),
3196 0, 0));
3197
3198 // Add argument registers to the end of the list so that they are
3199 // known live into the call.
3200 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3201 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3202
3203 // Add a register mask operand representing the call-preserved registers.
3204 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3205 const uint32_t *Mask =
3206 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3207 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 3207, __extension__
__PRETTY_FUNCTION__))
;
3208 Ops.push_back(DAG.getRegisterMask(Mask));
3209
3210 // Glue the call to the argument copies.
3211 Ops.push_back(Glue);
3212
3213 // Emit the call.
3214 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3215 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3216 Glue = Chain.getValue(1);
3217
3218 // Copy the return value from %r2.
3219 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3220}
3221
3222SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3223 SelectionDAG &DAG) const {
3224 SDValue Chain = DAG.getEntryNode();
3225 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3226
3227 // The high part of the thread pointer is in access register 0.
3228 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3229 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3230
3231 // The low part of the thread pointer is in access register 1.
3232 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3233 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3234
3235 // Merge them into a single 64-bit address.
3236 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3237 DAG.getConstant(32, DL, PtrVT));
3238 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3239}
3240
3241SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3242 SelectionDAG &DAG) const {
3243 if (DAG.getTarget().useEmulatedTLS())
3244 return LowerToTLSEmulatedModel(Node, DAG);
3245 SDLoc DL(Node);
3246 const GlobalValue *GV = Node->getGlobal();
3247 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3248 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
3249
3250 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3251 CallingConv::GHC)
3252 report_fatal_error("In GHC calling convention TLS is not supported");
3253
3254 SDValue TP = lowerThreadPointer(DL, DAG);
3255
3256 // Get the offset of GA from the thread pointer, based on the TLS model.
3257 SDValue Offset;
3258 switch (model) {
3259 case TLSModel::GeneralDynamic: {
3260 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3261 SystemZConstantPoolValue *CPV =
3262 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
3263
3264 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3265 Offset = DAG.getLoad(
3266 PtrVT, DL, DAG.getEntryNode(), Offset,
3267 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3268
3269 // Call __tls_get_offset to retrieve the offset.
3270 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3271 break;
3272 }
3273
3274 case TLSModel::LocalDynamic: {
3275 // Load the GOT offset of the module ID.
3276 SystemZConstantPoolValue *CPV =
3277 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
3278
3279 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3280 Offset = DAG.getLoad(
3281 PtrVT, DL, DAG.getEntryNode(), Offset,
3282 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3283
3284 // Call __tls_get_offset to retrieve the module base offset.
3285 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3286
3287 // Note: The SystemZLDCleanupPass will remove redundant computations
3288 // of the module base offset. Count total number of local-dynamic
3289 // accesses to trigger execution of that pass.
3290 SystemZMachineFunctionInfo* MFI =
3291 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
3292 MFI->incNumLocalDynamicTLSAccesses();
3293
3294 // Add the per-symbol offset.
3295 CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
3296
3297 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3298 DTPOffset = DAG.getLoad(
3299 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3300 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3301
3302 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3303 break;
3304 }
3305
3306 case TLSModel::InitialExec: {
3307 // Load the offset from the GOT.
3308 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3309 SystemZII::MO_INDNTPOFF);
3310 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
3311 Offset =
3312 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3313 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3314 break;
3315 }
3316
3317 case TLSModel::LocalExec: {
3318 // Force the offset into the constant pool and load it from there.
3319 SystemZConstantPoolValue *CPV =
3320 SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
3321
3322 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3323 Offset = DAG.getLoad(
3324 PtrVT, DL, DAG.getEntryNode(), Offset,
3325 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3326 break;
3327 }
3328 }
3329
3330 // Add the base and offset together.
3331 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3332}
3333
3334SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3335 SelectionDAG &DAG) const {
3336 SDLoc DL(Node);
3337 const BlockAddress *BA = Node->getBlockAddress();
3338 int64_t Offset = Node->getOffset();
3339 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3340
3341 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3342 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3343 return Result;
3344}
3345
3346SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3347 SelectionDAG &DAG) const {
3348 SDLoc DL(JT);
3349 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3350 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3351
3352 // Use LARL to load the address of the table.
3353 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3354}
3355
3356SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3357 SelectionDAG &DAG) const {
3358 SDLoc DL(CP);
3359 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3360
3361 SDValue Result;
3362 if (CP->isMachineConstantPoolEntry())
3363 Result =
3364 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3365 else
3366 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3367 CP->getOffset());
3368
3369 // Use LARL to load the address of the constant pool entry.
3370 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3371}
3372
3373SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3374 SelectionDAG &DAG) const {
3375 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
3376 MachineFunction &MF = DAG.getMachineFunction();
3377 MachineFrameInfo &MFI = MF.getFrameInfo();
3378 MFI.setFrameAddressIsTaken(true);
3379
3380 SDLoc DL(Op);
3381 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3382 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3383
3384 // By definition, the frame address is the address of the back chain. (In
3385 // the case of packed stack without backchain, return the address where the
3386 // backchain would have been stored. This will either be an unused space or
3387 // contain a saved register).
3388 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3389 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3390
3391 // FIXME The frontend should detect this case.
3392 if (Depth > 0) {
3393 report_fatal_error("Unsupported stack frame traversal count");
3394 }
3395
3396 return BackChain;
3397}
3398
3399SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3400 SelectionDAG &DAG) const {
3401 MachineFunction &MF = DAG.getMachineFunction();
3402 MachineFrameInfo &MFI = MF.getFrameInfo();
3403 MFI.setReturnAddressIsTaken(true);
3404
3405 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
3406 return SDValue();
3407
3408 SDLoc DL(Op);
3409 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3410 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3411
3412 // FIXME The frontend should detect this case.
3413 if (Depth > 0) {
3414 report_fatal_error("Unsupported stack frame traversal count");
3415 }
3416
3417 // Return R14D, which has the return address. Mark it an implicit live-in.
3418 Register LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
3419 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3420}
3421
3422SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3423 SelectionDAG &DAG) const {
3424 SDLoc DL(Op);
3425 SDValue In = Op.getOperand(0);
3426 EVT InVT = In.getValueType();
3427 EVT ResVT = Op.getValueType();
3428
3429 // Convert loads directly. This is normally done by DAGCombiner,
3430 // but we need this case for bitcasts that are created during lowering
3431 // and which are then lowered themselves.
3432 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3433 if (ISD::isNormalLoad(LoadN)) {
3434 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3435 LoadN->getBasePtr(), LoadN->getMemOperand());
3436 // Update the chain uses.
3437 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3438 return NewLoad;
3439 }
3440
3441 if (InVT == MVT::i32 && ResVT == MVT::f32) {
3442 SDValue In64;
3443 if (Subtarget.hasHighWord()) {
3444 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3445 MVT::i64);
3446 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3447 MVT::i64, SDValue(U64, 0), In);
3448 } else {
3449 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3450 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3451 DAG.getConstant(32, DL, MVT::i64));
3452 }
3453 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3454 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3455 DL, MVT::f32, Out64);
3456 }
3457 if (InVT == MVT::f32 && ResVT == MVT::i32) {
3458 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3459 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3460 MVT::f64, SDValue(U64, 0), In);
3461 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3462 if (Subtarget.hasHighWord())
3463 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3464 MVT::i32, Out64);
3465 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3466 DAG.getConstant(32, DL, MVT::i64));
3467 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3468 }
3469 llvm_unreachable("Unexpected bitcast combination")::llvm::llvm_unreachable_internal("Unexpected bitcast combination"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 3469)
;
3470}
3471
3472SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3473 SelectionDAG &DAG) const {
3474 MachineFunction &MF = DAG.getMachineFunction();
3475 SystemZMachineFunctionInfo *FuncInfo =
3476 MF.getInfo<SystemZMachineFunctionInfo>();
3477 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3478
3479 SDValue Chain = Op.getOperand(0);
3480 SDValue Addr = Op.getOperand(1);
3481 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3482 SDLoc DL(Op);
3483
3484 // The initial values of each field.
3485 const unsigned NumFields = 4;
3486 SDValue Fields[NumFields] = {
3487 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3488 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3489 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3490 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3491 };
3492
3493 // Store each field into its respective slot.
3494 SDValue MemOps[NumFields];
3495 unsigned Offset = 0;
3496 for (unsigned I = 0; I < NumFields; ++I) {
3497 SDValue FieldAddr = Addr;
3498 if (Offset != 0)
3499 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3500 DAG.getIntPtrConstant(Offset, DL));
3501 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3502 MachinePointerInfo(SV, Offset));
3503 Offset += 8;
3504 }
3505 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3506}
3507
3508SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3509 SelectionDAG &DAG) const {
3510 SDValue Chain = Op.getOperand(0);
3511 SDValue DstPtr = Op.getOperand(1);
3512 SDValue SrcPtr = Op.getOperand(2);
3513 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3514 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3515 SDLoc DL(Op);
3516
3517 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
3518 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3519 /*isTailCall*/ false, MachinePointerInfo(DstSV),
3520 MachinePointerInfo(SrcSV));
3521}
3522
3523SDValue SystemZTargetLowering::
3524lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
3525 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3526 MachineFunction &MF = DAG.getMachineFunction();
3527 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3528 bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3529
3530 SDValue Chain = Op.getOperand(0);
3531 SDValue Size = Op.getOperand(1);
3532 SDValue Align = Op.getOperand(2);
3533 SDLoc DL(Op);
3534
3535 // If user has set the no alignment function attribute, ignore
3536 // alloca alignments.
3537 uint64_t AlignVal =
3538 (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3539
3540 uint64_t StackAlign = TFI->getStackAlignment();
3541 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3542 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3543
3544 Register SPReg = getStackPointerRegisterToSaveRestore();
3545 SDValue NeededSpace = Size;
3546
3547 // Get a reference to the stack pointer.
3548 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
3549
3550 // If we need a backchain, save it now.
3551 SDValue Backchain;
3552 if (StoreBackchain)
3553 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
3554 MachinePointerInfo());
3555
3556 // Add extra space for alignment if needed.
3557 if (ExtraAlignSpace)
3558 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
3559 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3560
3561 // Get the new stack pointer value.
3562 SDValue NewSP;
3563 if (hasInlineStackProbe(MF)) {
3564 NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
3565 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
3566 Chain = NewSP.getValue(1);
3567 }
3568 else {
3569 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
3570 // Copy the new stack pointer back.
3571 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
3572 }
3573
3574 // The allocated data lives above the 160 bytes allocated for the standard
3575 // frame, plus any outgoing stack arguments. We don't know how much that
3576 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3577 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3578 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
3579
3580 // Dynamically realign if needed.
3581 if (RequiredAlign > StackAlign) {
3582 Result =
3583 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
3584 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3585 Result =
3586 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
3587 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
3588 }
3589
3590 if (StoreBackchain)
3591 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
3592 MachinePointerInfo());
3593
3594 SDValue Ops[2] = { Result, Chain };
3595 return DAG.getMergeValues(Ops, DL);
3596}
3597
3598SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3599 SDValue Op, SelectionDAG &DAG) const {
3600 SDLoc DL(Op);
3601
3602 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3603}
3604
3605SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
3606 SelectionDAG &DAG) const {
3607 EVT VT = Op.getValueType();
3608 SDLoc DL(Op);
3609 SDValue Ops[2];
3610 if (is32Bit(VT))
3611 // Just do a normal 64-bit multiplication and extract the results.
3612 // We define this so that it can be used for constant division.
3613 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
3614 Op.getOperand(1), Ops[1], Ops[0]);
3615 else if (Subtarget.hasMiscellaneousExtensions2())
3616 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3617 // the high result in the even register. ISD::SMUL_LOHI is defined to
3618 // return the low half first, so the results are in reverse order.
3619 lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI,
3620 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3621 else {
3622 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3623 //
3624 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3625 //
3626 // but using the fact that the upper halves are either all zeros
3627 // or all ones:
3628 //
3629 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3630 //
3631 // and grouping the right terms together since they are quicker than the
3632 // multiplication:
3633 //
3634 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3635 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
3636 SDValue LL = Op.getOperand(0);
3637 SDValue RL = Op.getOperand(1);
3638 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
3639 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
3640 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3641 // the high result in the even register. ISD::SMUL_LOHI is defined to
3642 // return the low half first, so the results are in reverse order.
3643 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
3644 LL, RL, Ops[1], Ops[0]);
3645 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
3646 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
3647 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
3648 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
3649 }
3650 return DAG.getMergeValues(Ops, DL);
3651}
3652
3653SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
3654 SelectionDAG &DAG) const {
3655 EVT VT = Op.getValueType();
3656 SDLoc DL(Op);
3657 SDValue Ops[2];
3658 if (is32Bit(VT))
3659 // Just do a normal 64-bit multiplication and extract the results.
3660 // We define this so that it can be used for constant division.
3661 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
3662 Op.getOperand(1), Ops[1], Ops[0]);
3663 else
3664 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3665 // the high result in the even register. ISD::UMUL_LOHI is defined to
3666 // return the low half first, so the results are in reverse order.
3667 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
3668 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3669 return DAG.getMergeValues(Ops, DL);
3670}
3671
3672SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
3673 SelectionDAG &DAG) const {
3674 SDValue Op0 = Op.getOperand(0);
3675 SDValue Op1 = Op.getOperand(1);
3676 EVT VT = Op.getValueType();
3677 SDLoc DL(Op);
3678
3679 // We use DSGF for 32-bit division. This means the first operand must
3680 // always be 64-bit, and the second operand should be 32-bit whenever
3681 // that is possible, to improve performance.
3682 if (is32Bit(VT))
3683 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
3684 else if (DAG.ComputeNumSignBits(Op1) > 32)
3685 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
3686
3687 // DSG(F) returns the remainder in the even register and the
3688 // quotient in the odd register.
3689 SDValue Ops[2];
3690 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
3691 return DAG.getMergeValues(Ops, DL);
3692}
3693
3694SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
3695 SelectionDAG &DAG) const {
3696 EVT VT = Op.getValueType();
3697 SDLoc DL(Op);
3698
3699 // DL(G) returns the remainder in the even register and the
3700 // quotient in the odd register.
3701 SDValue Ops[2];
3702 lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM,
3703 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3704 return DAG.getMergeValues(Ops, DL);
3705}
3706
3707SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
3708 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation")(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"Should be 64-bit operation") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Should be 64-bit operation\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 3708, __extension__
__PRETTY_FUNCTION__))
;
3709
3710 // Get the known-zero masks for each operand.
3711 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
3712 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
3713 DAG.computeKnownBits(Ops[1])};
3714
3715 // See if the upper 32 bits of one operand and the lower 32 bits of the
3716 // other are known zero. They are the low and high operands respectively.
3717 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
3718 Known[1].Zero.getZExtValue() };
3719 unsigned High, Low;
3720 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
3721 High = 1, Low = 0;
3722 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
3723 High = 0, Low = 1;
3724 else
3725 return Op;
3726
3727 SDValue LowOp = Ops[Low];
3728 SDValue HighOp = Ops[High];
3729
3730 // If the high part is a constant, we're better off using IILH.
3731 if (HighOp.getOpcode() == ISD::Constant)
3732 return Op;
3733
3734 // If the low part is a constant that is outside the range of LHI,
3735 // then we're better off using IILF.
3736 if (LowOp.getOpcode() == ISD::Constant) {
3737 int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
3738 if (!isInt<16>(Value))
3739 return Op;
3740 }
3741
3742 // Check whether the high part is an AND that doesn't change the
3743 // high 32 bits and just masks out low bits. We can skip it if so.
3744 if (HighOp.getOpcode() == ISD::AND &&
3745 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
3746 SDValue HighOp0 = HighOp.getOperand(0);
3747 uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
3748 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
3749 HighOp = HighOp0;
3750 }
3751
3752 // Take advantage of the fact that all GR32 operations only change the
3753 // low 32 bits by truncating Low to an i32 and inserting it directly
3754 // using a subreg. The interesting cases are those where the truncation
3755 // can be folded.
3756 SDLoc DL(Op);
3757 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
3758 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
3759 MVT::i64, HighOp, Low32);
3760}
3761
3762// Lower SADDO/SSUBO/UADDO/USUBO nodes.
3763SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
3764 SelectionDAG &DAG) const {
3765 SDNode *N = Op.getNode();
3766 SDValue LHS = N->getOperand(0);
3767 SDValue RHS = N->getOperand(1);
3768 SDLoc DL(N);
3769 unsigned BaseOp = 0;
3770 unsigned CCValid = 0;
3771 unsigned CCMask = 0;
3772
3773 switch (Op.getOpcode()) {
3774 default: llvm_unreachable("Unknown instruction!")::llvm::llvm_unreachable_internal("Unknown instruction!", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 3774)
;
3775 case ISD::SADDO:
3776 BaseOp = SystemZISD::SADDO;
3777 CCValid = SystemZ::CCMASK_ARITH;
3778 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
3779 break;
3780 case ISD::SSUBO:
3781 BaseOp = SystemZISD::SSUBO;
3782 CCValid = SystemZ::CCMASK_ARITH;
3783 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
3784 break;
3785 case ISD::UADDO:
3786 BaseOp = SystemZISD::UADDO;
3787 CCValid = SystemZ::CCMASK_LOGICAL;
3788 CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
3789 break;
3790 case ISD::USUBO:
3791 BaseOp = SystemZISD::USUBO;
3792 CCValid = SystemZ::CCMASK_LOGICAL;
3793 CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
3794 break;
3795 }
3796
3797 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
3798 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
3799
3800 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3801 if (N->getValueType(1) == MVT::i1)
3802 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3803
3804 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3805}
3806
3807static bool isAddCarryChain(SDValue Carry) {
3808 while (Carry.getOpcode() == ISD::ADDCARRY)
3809 Carry = Carry.getOperand(2);
3810 return Carry.getOpcode() == ISD::UADDO;
3811}
3812
3813static bool isSubBorrowChain(SDValue Carry) {
3814 while (Carry.getOpcode() == ISD::SUBCARRY)
3815 Carry = Carry.getOperand(2);
3816 return Carry.getOpcode() == ISD::USUBO;
3817}
3818
3819// Lower ADDCARRY/SUBCARRY nodes.
3820SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
3821 SelectionDAG &DAG) const {
3822
3823 SDNode *N = Op.getNode();
3824 MVT VT = N->getSimpleValueType(0);
3825
3826 // Let legalize expand this if it isn't a legal type yet.
3827 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3828 return SDValue();
3829
3830 SDValue LHS = N->getOperand(0);
3831 SDValue RHS = N->getOperand(1);
3832 SDValue Carry = Op.getOperand(2);
3833 SDLoc DL(N);
3834 unsigned BaseOp = 0;
3835 unsigned CCValid = 0;
3836 unsigned CCMask = 0;
3837
3838 switch (Op.getOpcode()) {
3839 default: llvm_unreachable("Unknown instruction!")::llvm::llvm_unreachable_internal("Unknown instruction!", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 3839)
;
3840 case ISD::ADDCARRY:
3841 if (!isAddCarryChain(Carry))
3842 return SDValue();
3843
3844 BaseOp = SystemZISD::ADDCARRY;
3845 CCValid = SystemZ::CCMASK_LOGICAL;
3846 CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
3847 break;
3848 case ISD::SUBCARRY:
3849 if (!isSubBorrowChain(Carry))
3850 return SDValue();
3851
3852 BaseOp = SystemZISD::SUBCARRY;
3853 CCValid = SystemZ::CCMASK_LOGICAL;
3854 CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
3855 break;
3856 }
3857
3858 // Set the condition code from the carry flag.
3859 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
3860 DAG.getConstant(CCValid, DL, MVT::i32),
3861 DAG.getConstant(CCMask, DL, MVT::i32));
3862
3863 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
3864 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
3865
3866 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3867 if (N->getValueType(1) == MVT::i1)
3868 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3869
3870 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3871}
3872
3873SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
3874 SelectionDAG &DAG) const {
3875 EVT VT = Op.getValueType();
3876 SDLoc DL(Op);
3877 Op = Op.getOperand(0);
3878
3879 // Handle vector types via VPOPCT.
3880 if (VT.isVector()) {
3881 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
3882 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
3883 switch (VT.getScalarSizeInBits()) {
3884 case 8:
3885 break;
3886 case 16: {
3887 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
3888 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
3889 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
3890 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3891 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
3892 break;
3893 }
3894 case 32: {
3895 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
3896 DAG.getConstant(0, DL, MVT::i32));
3897 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3898 break;
3899 }
3900 case 64: {
3901 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
3902 DAG.getConstant(0, DL, MVT::i32));
3903 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
3904 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3905 break;
3906 }
3907 default:
3908 llvm_unreachable("Unexpected type")::llvm::llvm_unreachable_internal("Unexpected type", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 3908)
;
3909 }
3910 return Op;
3911 }
3912
3913 // Get the known-zero mask for the operand.
3914 KnownBits Known = DAG.computeKnownBits(Op);
3915 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
3916 if (NumSignificantBits == 0)
3917 return DAG.getConstant(0, DL, VT);
3918
3919 // Skip known-zero high parts of the operand.
3920 int64_t OrigBitSize = VT.getSizeInBits();
3921 int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
3922 BitSize = std::min(BitSize, OrigBitSize);
3923
3924 // The POPCNT instruction counts the number of bits in each byte.
3925 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
3926 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
3927 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
3928
3929 // Add up per-byte counts in a binary tree. All bits of Op at
3930 // position larger than BitSize remain zero throughout.
3931 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
3932 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
3933 if (BitSize != OrigBitSize)
3934 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
3935 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
3936 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3937 }
3938
3939 // Extract overall result from high byte.
3940 if (BitSize > 8)
3941 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
3942 DAG.getConstant(BitSize - 8, DL, VT));
3943
3944 return Op;
3945}
3946
3947SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3948 SelectionDAG &DAG) const {
3949 SDLoc DL(Op);
3950 AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
3951 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
3952 SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
3953 cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
3954
3955 // The only fence that needs an instruction is a sequentially-consistent
3956 // cross-thread fence.
3957 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
3958 FenceSSID == SyncScope::System) {
3959 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
3960 Op.getOperand(0)),
3961 0);
3962 }
3963
3964 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3965 return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3966}
3967
3968// Op is an atomic load. Lower it into a normal volatile load.
3969SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
3970 SelectionDAG &DAG) const {
3971 auto *Node = cast<AtomicSDNode>(Op.getNode());
3972 return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
3973 Node->getChain(), Node->getBasePtr(),
3974 Node->getMemoryVT(), Node->getMemOperand());
3975}
3976
3977// Op is an atomic store. Lower it into a normal volatile store.
3978SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
3979 SelectionDAG &DAG) const {
3980 auto *Node = cast<AtomicSDNode>(Op.getNode());
3981 SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
3982 Node->getBasePtr(), Node->getMemoryVT(),
3983 Node->getMemOperand());
3984 // We have to enforce sequential consistency by performing a
3985 // serialization operation after the store.
3986 if (Node->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent)
3987 Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
3988 MVT::Other, Chain), 0);
3989 return Chain;
3990}
3991
3992// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
3993// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
3994SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
3995 SelectionDAG &DAG,
3996 unsigned Opcode) const {
3997 auto *Node = cast<AtomicSDNode>(Op.getNode());
3998
3999 // 32-bit operations need no code outside the main loop.
4000 EVT NarrowVT = Node->getMemoryVT();
4001 EVT WideVT = MVT::i32;
4002 if (NarrowVT == WideVT)
4003 return Op;
4004
4005 int64_t BitSize = NarrowVT.getSizeInBits();
4006 SDValue ChainIn = Node->getChain();
4007 SDValue Addr = Node->getBasePtr();
4008 SDValue Src2 = Node->getVal();
4009 MachineMemOperand *MMO = Node->getMemOperand();
4010 SDLoc DL(Node);
4011 EVT PtrVT = Addr.getValueType();
4012
4013 // Convert atomic subtracts of constants into additions.
4014 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
4015 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
4016 Opcode = SystemZISD::ATOMIC_LOADW_ADD;
4017 Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
4018 }
4019
4020 // Get the address of the containing word.
4021 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4022 DAG.getConstant(-4, DL, PtrVT));
4023
4024 // Get the number of bits that the word must be rotated left in order
4025 // to bring the field to the top bits of a GR32.
4026 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4027 DAG.getConstant(3, DL, PtrVT));
4028 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4029
4030 // Get the complementing shift amount, for rotating a field in the top
4031 // bits back to its proper position.
4032 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4033 DAG.getConstant(0, DL, WideVT), BitShift);
4034
4035 // Extend the source operand to 32 bits and prepare it for the inner loop.
4036 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
4037 // operations require the source to be shifted in advance. (This shift
4038 // can be folded if the source is constant.) For AND and NAND, the lower
4039 // bits must be set, while for other opcodes they should be left clear.
4040 if (Opcode != SystemZISD::ATOMIC_SWAPW)
4041 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
4042 DAG.getConstant(32 - BitSize, DL, WideVT));
4043 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
4044 Opcode == SystemZISD::ATOMIC_LOADW_NAND)
4045 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
4046 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
4047
4048 // Construct the ATOMIC_LOADW_* node.
4049 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
4050 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
4051 DAG.getConstant(BitSize, DL, WideVT) };
4052 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
4053 NarrowVT, MMO);
4054
4055 // Rotate the result of the final CS so that the field is in the lower
4056 // bits of a GR32, then truncate it.
4057 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
4058 DAG.getConstant(BitSize, DL, WideVT));
4059 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
4060
4061 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
4062 return DAG.getMergeValues(RetOps, DL);
4063}
4064
4065// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
4066// into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
4067// operations into additions.
4068SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
4069 SelectionDAG &DAG) const {
4070 auto *Node = cast<AtomicSDNode>(Op.getNode());
4071 EVT MemVT = Node->getMemoryVT();
4072 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
4073 // A full-width operation.
4074 assert(Op.getValueType() == MemVT && "Mismatched VTs")(static_cast <bool> (Op.getValueType() == MemVT &&
"Mismatched VTs") ? void (0) : __assert_fail ("Op.getValueType() == MemVT && \"Mismatched VTs\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4074, __extension__
__PRETTY_FUNCTION__))
;
4075 SDValue Src2 = Node->getVal();
4076 SDValue NegSrc2;
4077 SDLoc DL(Src2);
4078
4079 if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
4080 // Use an addition if the operand is constant and either LAA(G) is
4081 // available or the negative value is in the range of A(G)FHI.
4082 int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
4083 if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
4084 NegSrc2 = DAG.getConstant(Value, DL, MemVT);
4085 } else if (Subtarget.hasInterlockedAccess1())
4086 // Use LAA(G) if available.
4087 NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
4088 Src2);
4089
4090 if (NegSrc2.getNode())
4091 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
4092 Node->getChain(), Node->getBasePtr(), NegSrc2,
4093 Node->getMemOperand());
4094
4095 // Use the node as-is.
4096 return Op;
4097 }
4098
4099 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
4100}
4101
4102// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
4103SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
4104 SelectionDAG &DAG) const {
4105 auto *Node = cast<AtomicSDNode>(Op.getNode());
4106 SDValue ChainIn = Node->getOperand(0);
4107 SDValue Addr = Node->getOperand(1);
4108 SDValue CmpVal = Node->getOperand(2);
4109 SDValue SwapVal = Node->getOperand(3);
4110 MachineMemOperand *MMO = Node->getMemOperand();
4111 SDLoc DL(Node);
4112
4113 // We have native support for 32-bit and 64-bit compare and swap, but we
4114 // still need to expand extracting the "success" result from the CC.
4115 EVT NarrowVT = Node->getMemoryVT();
4116 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
4117 if (NarrowVT == WideVT) {
4118 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4119 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
4120 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP,
4121 DL, Tys, Ops, NarrowVT, MMO);
4122 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4123 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);
4124
4125 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
4126 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4127 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4128 return SDValue();
4129 }
4130
4131 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
4132 // via a fullword ATOMIC_CMP_SWAPW operation.
4133 int64_t BitSize = NarrowVT.getSizeInBits();
4134 EVT PtrVT = Addr.getValueType();
4135
4136 // Get the address of the containing word.
4137 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4138 DAG.getConstant(-4, DL, PtrVT));
4139
4140 // Get the number of bits that the word must be rotated left in order
4141 // to bring the field to the top bits of a GR32.
4142 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4143 DAG.getConstant(3, DL, PtrVT));
4144 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4145
4146 // Get the complementing shift amount, for rotating a field in the top
4147 // bits back to its proper position.
4148 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4149 DAG.getConstant(0, DL, WideVT), BitShift);
4150
4151 // Construct the ATOMIC_CMP_SWAPW node.
4152 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4153 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
4154 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
4155 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
4156 VTList, Ops, NarrowVT, MMO);
4157 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4158 SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ);
4159
4160 // emitAtomicCmpSwapW() will zero extend the result (original value).
4161 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
4162 DAG.getValueType(NarrowVT));
4163 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
4164 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4165 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4166 return SDValue();
4167}
4168
4169MachineMemOperand::Flags
4170SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
4171 // Because of how we convert atomic_load and atomic_store to normal loads and
4172 // stores in the DAG, we need to ensure that the MMOs are marked volatile
4173 // since DAGCombine hasn't been updated to account for atomic, but non
4174 // volatile loads. (See D57601)
4175 if (auto *SI = dyn_cast<StoreInst>(&I))
4176 if (SI->isAtomic())
4177 return MachineMemOperand::MOVolatile;
4178 if (auto *LI = dyn_cast<LoadInst>(&I))
4179 if (LI->isAtomic())
4180 return MachineMemOperand::MOVolatile;
4181 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
4182 if (AI->isAtomic())
4183 return MachineMemOperand::MOVolatile;
4184 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
4185 if (AI->isAtomic())
4186 return MachineMemOperand::MOVolatile;
4187 return MachineMemOperand::MONone;
4188}
4189
4190SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
4191 SelectionDAG &DAG) const {
4192 MachineFunction &MF = DAG.getMachineFunction();
4193 const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
4194 auto *Regs = Subtarget->getSpecialRegisters();
4195 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
4196 report_fatal_error("Variable-sized stack allocations are not supported "
4197 "in GHC calling convention");
4198 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
4199 Regs->getStackPointerRegister(), Op.getValueType());
4200}
4201
4202SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
4203 SelectionDAG &DAG) const {
4204 MachineFunction &MF = DAG.getMachineFunction();
4205 const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
4206 auto *Regs = Subtarget->getSpecialRegisters();
4207 bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
4208
4209 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
4210 report_fatal_error("Variable-sized stack allocations are not supported "
4211 "in GHC calling convention");
4212
4213 SDValue Chain = Op.getOperand(0);
4214 SDValue NewSP = Op.getOperand(1);
4215 SDValue Backchain;
4216 SDLoc DL(Op);
4217
4218 if (StoreBackchain) {
4219 SDValue OldSP = DAG.getCopyFromReg(
4220 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
4221 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4222 MachinePointerInfo());
4223 }
4224
4225 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
4226
4227 if (StoreBackchain)
4228 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4229 MachinePointerInfo());
4230
4231 return Chain;
4232}
4233
4234SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
4235 SelectionDAG &DAG) const {
4236 bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
4237 if (!IsData)
4238 // Just preserve the chain.
4239 return Op.getOperand(0);
4240
4241 SDLoc DL(Op);
4242 bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
4243 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
4244 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
4245 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
4246 Op.getOperand(1)};
4247 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
4248 Node->getVTList(), Ops,
4249 Node->getMemoryVT(), Node->getMemOperand());
4250}
4251
4252// Convert condition code in CCReg to an i32 value.
4253static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) {
4254 SDLoc DL(CCReg);
4255 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
4256 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
4257 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
4258}
4259
4260SDValue
4261SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4262 SelectionDAG &DAG) const {
4263 unsigned Opcode, CCValid;
4264 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
4265 assert(Op->getNumValues() == 2 && "Expected only CC result and chain")(static_cast <bool> (Op->getNumValues() == 2 &&
"Expected only CC result and chain") ? void (0) : __assert_fail
("Op->getNumValues() == 2 && \"Expected only CC result and chain\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4265, __extension__
__PRETTY_FUNCTION__))
;
4266 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
4267 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
4268 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
4269 return SDValue();
4270 }
4271
4272 return SDValue();
4273}
4274
4275SDValue
4276SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4277 SelectionDAG &DAG) const {
4278 unsigned Opcode, CCValid;
4279 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
4280 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
4281 if (Op->getNumValues() == 1)
4282 return getCCResult(DAG, SDValue(Node, 0));
4283 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result")(static_cast <bool> (Op->getNumValues() == 2 &&
"Expected a CC and non-CC result") ? void (0) : __assert_fail
("Op->getNumValues() == 2 && \"Expected a CC and non-CC result\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4283, __extension__
__PRETTY_FUNCTION__))
;
4284 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
4285 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
4286 }
4287
4288 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4289 switch (Id) {
4290 case Intrinsic::thread_pointer:
4291 return lowerThreadPointer(SDLoc(Op), DAG);
4292
4293 case Intrinsic::s390_vpdi:
4294 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
4295 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4296
4297 case Intrinsic::s390_vperm:
4298 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
4299 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4300
4301 case Intrinsic::s390_vuphb:
4302 case Intrinsic::s390_vuphh:
4303 case Intrinsic::s390_vuphf:
4304 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
4305 Op.getOperand(1));
4306
4307 case Intrinsic::s390_vuplhb:
4308 case Intrinsic::s390_vuplhh:
4309 case Intrinsic::s390_vuplhf:
4310 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
4311 Op.getOperand(1));
4312
4313 case Intrinsic::s390_vuplb:
4314 case Intrinsic::s390_vuplhw:
4315 case Intrinsic::s390_vuplf:
4316 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
4317 Op.getOperand(1));
4318
4319 case Intrinsic::s390_vupllb:
4320 case Intrinsic::s390_vupllh:
4321 case Intrinsic::s390_vupllf:
4322 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
4323 Op.getOperand(1));
4324
4325 case Intrinsic::s390_vsumb:
4326 case Intrinsic::s390_vsumh:
4327 case Intrinsic::s390_vsumgh:
4328 case Intrinsic::s390_vsumgf:
4329 case Intrinsic::s390_vsumqf:
4330 case Intrinsic::s390_vsumqg:
4331 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
4332 Op.getOperand(1), Op.getOperand(2));
4333 }
4334
4335 return SDValue();
4336}
4337
4338namespace {
4339// Says that SystemZISD operation Opcode can be used to perform the equivalent
4340// of a VPERM with permute vector Bytes. If Opcode takes three operands,
4341// Operand is the constant third operand, otherwise it is the number of
4342// bytes in each element of the result.
4343struct Permute {
4344 unsigned Opcode;
4345 unsigned Operand;
4346 unsigned char Bytes[SystemZ::VectorBytes];
4347};
4348}
4349
4350static const Permute PermuteForms[] = {
4351 // VMRHG
4352 { SystemZISD::MERGE_HIGH, 8,
4353 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
4354 // VMRHF
4355 { SystemZISD::MERGE_HIGH, 4,
4356 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
4357 // VMRHH
4358 { SystemZISD::MERGE_HIGH, 2,
4359 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
4360 // VMRHB
4361 { SystemZISD::MERGE_HIGH, 1,
4362 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
4363 // VMRLG
4364 { SystemZISD::MERGE_LOW, 8,
4365 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
4366 // VMRLF
4367 { SystemZISD::MERGE_LOW, 4,
4368 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
4369 // VMRLH
4370 { SystemZISD::MERGE_LOW, 2,
4371 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
4372 // VMRLB
4373 { SystemZISD::MERGE_LOW, 1,
4374 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
4375 // VPKG
4376 { SystemZISD::PACK, 4,
4377 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
4378 // VPKF
4379 { SystemZISD::PACK, 2,
4380 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
4381 // VPKH
4382 { SystemZISD::PACK, 1,
4383 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
4384 // VPDI V1, V2, 4 (low half of V1, high half of V2)
4385 { SystemZISD::PERMUTE_DWORDS, 4,
4386 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
4387 // VPDI V1, V2, 1 (high half of V1, low half of V2)
4388 { SystemZISD::PERMUTE_DWORDS, 1,
4389 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
4390};
4391
4392// Called after matching a vector shuffle against a particular pattern.
4393// Both the original shuffle and the pattern have two vector operands.
4394// OpNos[0] is the operand of the original shuffle that should be used for
4395// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
4396// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
4397// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
4398// for operands 0 and 1 of the pattern.
4399static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
4400 if (OpNos[0] < 0) {
4401 if (OpNos[1] < 0)
4402 return false;
4403 OpNo0 = OpNo1 = OpNos[1];
4404 } else if (OpNos[1] < 0) {
4405 OpNo0 = OpNo1 = OpNos[0];
4406 } else {
4407 OpNo0 = OpNos[0];
4408 OpNo1 = OpNos[1];
4409 }
4410 return true;
4411}
4412
4413// Bytes is a VPERM-like permute vector, except that -1 is used for
4414// undefined bytes. Return true if the VPERM can be implemented using P.
4415// When returning true set OpNo0 to the VPERM operand that should be
4416// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
4417//
4418// For example, if swapping the VPERM operands allows P to match, OpNo0
4419// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
4420// operand, but rewriting it to use two duplicated operands allows it to
4421// match P, then OpNo0 and OpNo1 will be the same.
4422static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
4423 unsigned &OpNo0, unsigned &OpNo1) {
4424 int OpNos[] = { -1, -1 };
4425 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
4426 int Elt = Bytes[I];
4427 if (Elt >= 0) {
4428 // Make sure that the two permute vectors use the same suboperand
4429 // byte number. Only the operand numbers (the high bits) are
4430 // allowed to differ.
4431 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
4432 return false;
4433 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
4434 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
4435 // Make sure that the operand mappings are consistent with previous
4436 // elements.
4437 if (OpNos[ModelOpNo] == 1 - RealOpNo)
4438 return false;
4439 OpNos[ModelOpNo] = RealOpNo;
4440 }
4441 }
4442 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
4443}
4444
4445// As above, but search for a matching permute.
4446static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
4447 unsigned &OpNo0, unsigned &OpNo1) {
4448 for (auto &P : PermuteForms)
4449 if (matchPermute(Bytes, P, OpNo0, OpNo1))
4450 return &P;
4451 return nullptr;
4452}
4453
4454// Bytes is a VPERM-like permute vector, except that -1 is used for
4455// undefined bytes. This permute is an operand of an outer permute.
4456// See whether redistributing the -1 bytes gives a shuffle that can be
4457// implemented using P. If so, set Transform to a VPERM-like permute vector
4458// that, when applied to the result of P, gives the original permute in Bytes.
4459static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
4460 const Permute &P,
4461 SmallVectorImpl<int> &Transform) {
4462 unsigned To = 0;
4463 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
4464 int Elt = Bytes[From];
4465 if (Elt < 0)
4466 // Byte number From of the result is undefined.
4467 Transform[From] = -1;
4468 else {
4469 while (P.Bytes[To] != Elt) {
4470 To += 1;
4471 if (To == SystemZ::VectorBytes)
4472 return false;
4473 }
4474 Transform[From] = To;
4475 }
4476 }
4477 return true;
4478}
4479
4480// As above, but search for a matching permute.
4481static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
4482 SmallVectorImpl<int> &Transform) {
4483 for (auto &P : PermuteForms)
4484 if (matchDoublePermute(Bytes, P, Transform))
4485 return &P;
4486 return nullptr;
4487}
4488
4489// Convert the mask of the given shuffle op into a byte-level mask,
4490// as if it had type vNi8.
4491static bool getVPermMask(SDValue ShuffleOp,
4492 SmallVectorImpl<int> &Bytes) {
4493 EVT VT = ShuffleOp.getValueType();
4494 unsigned NumElements = VT.getVectorNumElements();
4495 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4496
4497 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
4498 Bytes.resize(NumElements * BytesPerElement, -1);
4499 for (unsigned I = 0; I < NumElements; ++I) {
4500 int Index = VSN->getMaskElt(I);
4501 if (Index >= 0)
4502 for (unsigned J = 0; J < BytesPerElement; ++J)
4503 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
4504 }
4505 return true;
4506 }
4507 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
4508 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
4509 unsigned Index = ShuffleOp.getConstantOperandVal(1);
4510 Bytes.resize(NumElements * BytesPerElement, -1);
4511 for (unsigned I = 0; I < NumElements; ++I)
4512 for (unsigned J = 0; J < BytesPerElement; ++J)
4513 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
4514 return true;
4515 }
4516 return false;
4517}
4518
4519// Bytes is a VPERM-like permute vector, except that -1 is used for
4520// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
4521// the result come from a contiguous sequence of bytes from one input.
4522// Set Base to the selector for the first byte if so.
4523static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
4524 unsigned BytesPerElement, int &Base) {
4525 Base = -1;
4526 for (unsigned I = 0; I < BytesPerElement; ++I) {
4527 if (Bytes[Start + I] >= 0) {
4528 unsigned Elem = Bytes[Start + I];
4529 if (Base < 0) {
4530 Base = Elem - I;
4531 // Make sure the bytes would come from one input operand.
4532 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
4533 return false;
4534 } else if (unsigned(Base) != Elem - I)
4535 return false;
4536 }
4537 }
4538 return true;
4539}
4540
4541// Bytes is a VPERM-like permute vector, except that -1 is used for
4542// undefined bytes. Return true if it can be performed using VSLDB.
4543// When returning true, set StartIndex to the shift amount and OpNo0
4544// and OpNo1 to the VPERM operands that should be used as the first
4545// and second shift operand respectively.
4546static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
4547 unsigned &StartIndex, unsigned &OpNo0,
4548 unsigned &OpNo1) {
4549 int OpNos[] = { -1, -1 };
4550 int Shift = -1;
4551 for (unsigned I = 0; I < 16; ++I) {
4552 int Index = Bytes[I];
4553 if (Index >= 0) {
4554 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
4555 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
4556 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
4557 if (Shift < 0)
4558 Shift = ExpectedShift;
4559 else if (Shift != ExpectedShift)
4560 return false;
4561 // Make sure that the operand mappings are consistent with previous
4562 // elements.
4563 if (OpNos[ModelOpNo] == 1 - RealOpNo)
4564 return false;
4565 OpNos[ModelOpNo] = RealOpNo;
4566 }
4567 }
4568 StartIndex = Shift;
4569 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
4570}
4571
4572// Create a node that performs P on operands Op0 and Op1, casting the
4573// operands to the appropriate type. The type of the result is determined by P.
4574static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
4575 const Permute &P, SDValue Op0, SDValue Op1) {
4576 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
4577 // elements of a PACK are twice as wide as the outputs.
4578 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
4579 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
4580 P.Operand);
4581 // Cast both operands to the appropriate type.
4582 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
4583 SystemZ::VectorBytes / InBytes);
4584 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
4585 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
4586 SDValue Op;
4587 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
4588 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
4589 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
4590 } else if (P.Opcode == SystemZISD::PACK) {
4591 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
4592 SystemZ::VectorBytes / P.Operand);
4593 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
4594 } else {
4595 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
4596 }
4597 return Op;
4598}
4599
4600static bool isZeroVector(SDValue N) {
4601 if (N->getOpcode() == ISD::BITCAST)
4602 N = N->getOperand(0);
4603 if (N->getOpcode() == ISD::SPLAT_VECTOR)
4604 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
4605 return Op->getZExtValue() == 0;
4606 return ISD::isBuildVectorAllZeros(N.getNode());
4607}
4608
4609// Return the index of the zero/undef vector, or UINT32_MAX if not found.
4610static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
4611 for (unsigned I = 0; I < Num ; I++)
4612 if (isZeroVector(Ops[I]))
4613 return I;
4614 return UINT32_MAX(4294967295U);
4615}
4616
4617// Bytes is a VPERM-like permute vector, except that -1 is used for
4618// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
4619// VSLDB or VPERM.
4620static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
4621 SDValue *Ops,
4622 const SmallVectorImpl<int> &Bytes) {
4623 for (unsigned I = 0; I < 2; ++I)
4624 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
4625
4626 // First see whether VSLDB can be used.
4627 unsigned StartIndex, OpNo0, OpNo1;
4628 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
4629 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
4630 Ops[OpNo1],
4631 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
4632
4633 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
4634 // eliminate a zero vector by reusing any zero index in the permute vector.
4635 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
4636 if (ZeroVecIdx != UINT32_MAX(4294967295U)) {
4637 bool MaskFirst = true;
4638 int ZeroIdx = -1;
4639 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
4640 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
4641 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
4642 if (OpNo == ZeroVecIdx && I == 0) {
4643 // If the first byte is zero, use mask as first operand.
4644 ZeroIdx = 0;
4645 break;
4646 }
4647 if (OpNo != ZeroVecIdx && Byte == 0) {
4648 // If mask contains a zero, use it by placing that vector first.
4649 ZeroIdx = I + SystemZ::VectorBytes;
4650 MaskFirst = false;
4651 break;
4652 }
4653 }
4654 if (ZeroIdx != -1) {
4655 SDValue IndexNodes[SystemZ::VectorBytes];
4656 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
4657 if (Bytes[I] >= 0) {
4658 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
4659 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
4660 if (OpNo == ZeroVecIdx)
4661 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
4662 else {
4663 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
4664 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
4665 }
4666 } else
4667 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
4668 }
4669 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
4670 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
4671 if (MaskFirst)
4672 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
4673 Mask);
4674 else
4675 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
4676 Mask);
4677 }
4678 }
4679
4680 SDValue IndexNodes[SystemZ::VectorBytes];
4681 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4682 if (Bytes[I] >= 0)
4683 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
4684 else
4685 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
4686 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
4687 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
4688 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
4689}
4690
4691namespace {
4692// Describes a general N-operand vector shuffle.
4693struct GeneralShuffle {
4694 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX(2147483647 *2U +1U)) {}
4695 void addUndef();
4696 bool add(SDValue, unsigned);
4697 SDValue getNode(SelectionDAG &, const SDLoc &);
4698 void tryPrepareForUnpack();
4699 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
4700 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
4701
4702 // The operands of the shuffle.
4703 SmallVector<SDValue, SystemZ::VectorBytes> Ops;
4704
4705 // Index I is -1 if byte I of the result is undefined. Otherwise the
4706 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
4707 // Bytes[I] / SystemZ::VectorBytes.
4708 SmallVector<int, SystemZ::VectorBytes> Bytes;
4709
4710 // The type of the shuffle result.
4711 EVT VT;
4712
4713 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
4714 unsigned UnpackFromEltSize;
4715};
4716}
4717
4718// Add an extra undefined element to the shuffle.
4719void GeneralShuffle::addUndef() {
4720 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4721 for (unsigned I = 0; I < BytesPerElement; ++I)
4722 Bytes.push_back(-1);
4723}
4724
4725// Add an extra element to the shuffle, taking it from element Elem of Op.
4726// A null Op indicates a vector input whose value will be calculated later;
4727// there is at most one such input per shuffle and it always has the same
4728// type as the result. Aborts and returns false if the source vector elements
4729// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
4730// LLVM they become implicitly extended, but this is rare and not optimized.
4731bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
4732 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4733
4734 // The source vector can have wider elements than the result,
4735 // either through an explicit TRUNCATE or because of type legalization.
4736 // We want the least significant part.
4737 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
4738 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
4739
4740 // Return false if the source elements are smaller than their destination
4741 // elements.
4742 if (FromBytesPerElement < BytesPerElement)
4743 return false;
4744
4745 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
4746 (FromBytesPerElement - BytesPerElement));
4747
4748 // Look through things like shuffles and bitcasts.
4749 while (Op.getNode()) {
4750 if (Op.getOpcode() == ISD::BITCAST)
4751 Op = Op.getOperand(0);
4752 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
4753 // See whether the bytes we need come from a contiguous part of one
4754 // operand.
4755 SmallVector<int, SystemZ::VectorBytes> OpBytes;
4756 if (!getVPermMask(Op, OpBytes))
4757 break;
4758 int NewByte;
4759 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
4760 break;
4761 if (NewByte < 0) {
4762 addUndef();
4763 return true;
4764 }
4765 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
4766 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
4767 } else if (Op.isUndef()) {
4768 addUndef();
4769 return true;
4770 } else
4771 break;
4772 }
4773
4774 // Make sure that the source of the extraction is in Ops.
4775 unsigned OpNo = 0;
4776 for (; OpNo < Ops.size(); ++OpNo)
4777 if (Ops[OpNo] == Op)
4778 break;
4779 if (OpNo == Ops.size())
4780 Ops.push_back(Op);
4781
4782 // Add the element to Bytes.
4783 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
4784 for (unsigned I = 0; I < BytesPerElement; ++I)
4785 Bytes.push_back(Base + I);
4786
4787 return true;
4788}
4789
4790// Return SDNodes for the completed shuffle.
4791SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
4792 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector")(static_cast <bool> (Bytes.size() == SystemZ::VectorBytes
&& "Incomplete vector") ? void (0) : __assert_fail (
"Bytes.size() == SystemZ::VectorBytes && \"Incomplete vector\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4792, __extension__
__PRETTY_FUNCTION__))
;
4793
4794 if (Ops.size() == 0)
4795 return DAG.getUNDEF(VT);
4796
4797 // Use a single unpack if possible as the last operation.
4798 tryPrepareForUnpack();
4799
4800 // Make sure that there are at least two shuffle operands.
4801 if (Ops.size() == 1)
4802 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
4803
4804 // Create a tree of shuffles, deferring root node until after the loop.
4805 // Try to redistribute the undefined elements of non-root nodes so that
4806 // the non-root shuffles match something like a pack or merge, then adjust
4807 // the parent node's permute vector to compensate for the new order.
4808 // Among other things, this copes with vectors like <2 x i16> that were
4809 // padded with undefined elements during type legalization.
4810 //
4811 // In the best case this redistribution will lead to the whole tree
4812 // using packs and merges. It should rarely be a loss in other cases.
4813 unsigned Stride = 1;
4814 for (; Stride * 2 < Ops.size(); Stride *= 2) {
4815 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
4816 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
4817
4818 // Create a mask for just these two operands.
4819 SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes);
4820 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
4821 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
4822 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
4823 if (OpNo == I)
4824 NewBytes[J] = Byte;
4825 else if (OpNo == I + Stride)
4826 NewBytes[J] = SystemZ::VectorBytes + Byte;
4827 else
4828 NewBytes[J] = -1;
4829 }
4830 // See if it would be better to reorganize NewMask to avoid using VPERM.
4831 SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
4832 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
4833 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
4834 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
4835 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
4836 if (NewBytes[J] >= 0) {
4837 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&(static_cast <bool> (unsigned(NewBytesMap[J]) < SystemZ
::VectorBytes && "Invalid double permute") ? void (0)
: __assert_fail ("unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && \"Invalid double permute\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4838, __extension__
__PRETTY_FUNCTION__))
4838 "Invalid double permute")(static_cast <bool> (unsigned(NewBytesMap[J]) < SystemZ
::VectorBytes && "Invalid double permute") ? void (0)
: __assert_fail ("unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && \"Invalid double permute\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4838, __extension__
__PRETTY_FUNCTION__))
;
4839 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
4840 } else
4841 assert(NewBytesMap[J] < 0 && "Invalid double permute")(static_cast <bool> (NewBytesMap[J] < 0 && "Invalid double permute"
) ? void (0) : __assert_fail ("NewBytesMap[J] < 0 && \"Invalid double permute\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4841, __extension__
__PRETTY_FUNCTION__))
;
4842 }
4843 } else {
4844 // Just use NewBytes on the operands.
4845 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
4846 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
4847 if (NewBytes[J] >= 0)
4848 Bytes[J] = I * SystemZ::VectorBytes + J;
4849 }
4850 }
4851 }
4852
4853 // Now we just have 2 inputs. Put the second operand in Ops[1].
4854 if (Stride > 1) {
4855 Ops[1] = Ops[Stride];
4856 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4857 if (Bytes[I] >= int(SystemZ::VectorBytes))
4858 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
4859 }
4860
4861 // Look for an instruction that can do the permute without resorting
4862 // to VPERM.
4863 unsigned OpNo0, OpNo1;
4864 SDValue Op;
4865 if (unpackWasPrepared() && Ops[1].isUndef())
4866 Op = Ops[0];
4867 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
4868 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
4869 else
4870 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
4871
4872 Op = insertUnpackIfPrepared(DAG, DL, Op);
4873
4874 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4875}
4876
4877#ifndef NDEBUG
4878static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
4879 dbgs() << Msg.c_str() << " { ";
4880 for (unsigned i = 0; i < Bytes.size(); i++)
4881 dbgs() << Bytes[i] << " ";
4882 dbgs() << "}\n";
4883}
4884#endif
4885
4886// If the Bytes vector matches an unpack operation, prepare to do the unpack
4887// after all else by removing the zero vector and the effect of the unpack on
4888// Bytes.
4889void GeneralShuffle::tryPrepareForUnpack() {
4890 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
4891 if (ZeroVecOpNo == UINT32_MAX(4294967295U) || Ops.size() == 1)
4892 return;
4893
4894 // Only do this if removing the zero vector reduces the depth, otherwise
4895 // the critical path will increase with the final unpack.
4896 if (Ops.size() > 2 &&
4897 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
4898 return;
4899
4900 // Find an unpack that would allow removing the zero vector from Ops.
4901 UnpackFromEltSize = 1;
4902 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
4903 bool MatchUnpack = true;
4904 SmallVector<int, SystemZ::VectorBytes> SrcBytes;
4905 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
4906 unsigned ToEltSize = UnpackFromEltSize * 2;
4907 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
4908 if (!IsZextByte)
4909 SrcBytes.push_back(Bytes[Elt]);
4910 if (Bytes[Elt] != -1) {
4911 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
4912 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
4913 MatchUnpack = false;
4914 break;
4915 }
4916 }
4917 }
4918 if (MatchUnpack) {
4919 if (Ops.size() == 2) {
4920 // Don't use unpack if a single source operand needs rearrangement.
4921 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
4922 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
4923 UnpackFromEltSize = UINT_MAX(2147483647 *2U +1U);
4924 return;
4925 }
4926 }
4927 break;
4928 }
4929 }
4930 if (UnpackFromEltSize > 4)
4931 return;
4932
4933 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dbgs() << "Preparing for final unpack of element size "
<< UnpackFromEltSize << ". Zero vector is Op#" <<
ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:"
);; } } while (false)
4934 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNodo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dbgs() << "Preparing for final unpack of element size "
<< UnpackFromEltSize << ". Zero vector is Op#" <<
ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:"
);; } } while (false)
4935 << ".\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dbgs() << "Preparing for final unpack of element size "
<< UnpackFromEltSize << ". Zero vector is Op#" <<
ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:"
);; } } while (false)
4936 dumpBytes(Bytes, "Original Bytes vector:");)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dbgs() << "Preparing for final unpack of element size "
<< UnpackFromEltSize << ". Zero vector is Op#" <<
ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:"
);; } } while (false)
;
4937
4938 // Apply the unpack in reverse to the Bytes array.
4939 unsigned B = 0;
4940 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
4941 Elt += UnpackFromEltSize;
4942 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
4943 Bytes[B] = Bytes[Elt];
4944 }
4945 while (B < SystemZ::VectorBytes)
4946 Bytes[B++] = -1;
4947
4948 // Remove the zero vector from Ops
4949 Ops.erase(&Ops[ZeroVecOpNo]);
4950 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4951 if (Bytes[I] >= 0) {
4952 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
4953 if (OpNo > ZeroVecOpNo)
4954 Bytes[I] -= SystemZ::VectorBytes;
4955 }
4956
4957 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:"
); dbgs() << "\n";; } } while (false)
4958 dbgs() << "\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:"
); dbgs() << "\n";; } } while (false)
;
4959}
4960
4961SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
4962 const SDLoc &DL,
4963 SDValue Op) {
4964 if (!unpackWasPrepared())
4965 return Op;
4966 unsigned InBits = UnpackFromEltSize * 8;
4967 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
4968 SystemZ::VectorBits / InBits);
4969 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
4970 unsigned OutBits = InBits * 2;
4971 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
4972 SystemZ::VectorBits / OutBits);
4973 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
4974}
4975
4976// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
4977static bool isScalarToVector(SDValue Op) {
4978 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
4979 if (!Op.getOperand(I).isUndef())
4980 return false;
4981 return true;
4982}
4983
4984// Return a vector of type VT that contains Value in the first element.
4985// The other elements don't matter.
4986static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4987 SDValue Value) {
4988 // If we have a constant, replicate it to all elements and let the
4989 // BUILD_VECTOR lowering take care of it.
4990 if (Value.getOpcode() == ISD::Constant ||
4991 Value.getOpcode() == ISD::ConstantFP) {
4992 SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);
4993 return DAG.getBuildVector(VT, DL, Ops);
4994 }
4995 if (Value.isUndef())
4996 return DAG.getUNDEF(VT);
4997 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
4998}
4999
5000// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
5001// element 1. Used for cases in which replication is cheap.
5002static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5003 SDValue Op0, SDValue Op1) {
5004 if (Op0.isUndef()) {
5005 if (Op1.isUndef())
5006 return DAG.getUNDEF(VT);
5007 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
5008 }
5009 if (Op1.isUndef())
5010 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
5011 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
5012 buildScalarToVector(DAG, DL, VT, Op0),
5013 buildScalarToVector(DAG, DL, VT, Op1));
5014}
5015
5016// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
5017// vector for them.
5018static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
5019 SDValue Op1) {
5020 if (Op0.isUndef() && Op1.isUndef())
5021 return DAG.getUNDEF(MVT::v2i64);
5022 // If one of the two inputs is undefined then replicate the other one,
5023 // in order to avoid using another register unnecessarily.
5024 if (Op0.isUndef())
5025 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5026 else if (Op1.isUndef())
5027 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5028 else {
5029 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5030 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5031 }
5032 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
5033}
5034
5035// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
5036// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
5037// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
5038// would benefit from this representation and return it if so.
5039static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
5040 BuildVectorSDNode *BVN) {
5041 EVT VT = BVN->getValueType(0);
5042 unsigned NumElements = VT.getVectorNumElements();
5043
5044 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
5045 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
5046 // need a BUILD_VECTOR, add an additional placeholder operand for that
5047 // BUILD_VECTOR and store its operands in ResidueOps.
5048 GeneralShuffle GS(VT);
5049 SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps;
5050 bool FoundOne = false;
5051 for (unsigned I = 0; I < NumElements; ++I) {
5052 SDValue Op = BVN->getOperand(I);
5053 if (Op.getOpcode() == ISD::TRUNCATE)
5054 Op = Op.getOperand(0);
5055 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5056 Op.getOperand(1).getOpcode() == ISD::Constant) {
5057 unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
5058 if (!GS.add(Op.getOperand(0), Elem))
5059 return SDValue();
5060 FoundOne = true;
5061 } else if (Op.isUndef()) {
5062 GS.addUndef();
5063 } else {
5064 if (!GS.add(SDValue(), ResidueOps.size()))
5065 return SDValue();
5066 ResidueOps.push_back(BVN->getOperand(I));
5067 }
5068 }
5069
5070 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
5071 if (!FoundOne)
5072 return SDValue();
5073
5074 // Create the BUILD_VECTOR for the remaining elements, if any.
5075 if (!ResidueOps.empty()) {
5076 while (ResidueOps.size() < NumElements)
5077 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
5078 for (auto &Op : GS.Ops) {
5079 if (!Op.getNode()) {
5080 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
5081 break;
5082 }
5083 }
5084 }
5085 return GS.getNode(DAG, SDLoc(BVN));
5086}
5087
5088bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
5089 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
5090 return true;
5091 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
5092 return true;
5093 return false;
5094}
5095
5096// Combine GPR scalar values Elems into a vector of type VT.
5097SDValue
5098SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5099 SmallVectorImpl<SDValue> &Elems) const {
5100 // See whether there is a single replicated value.
5101 SDValue Single;
5102 unsigned int NumElements = Elems.size();
5103 unsigned int Count = 0;
5104 for (auto Elem : Elems) {
5105 if (!Elem.isUndef()) {
5106 if (!Single.getNode())
5107 Single = Elem;
5108 else if (Elem != Single) {
5109 Single = SDValue();
5110 break;
5111 }
5112 Count += 1;
5113 }
5114 }
5115 // There are three cases here:
5116 //
5117 // - if the only defined element is a loaded one, the best sequence
5118 // is a replicating load.
5119 //
5120 // - otherwise, if the only defined element is an i64 value, we will
5121 // end up with the same VLVGP sequence regardless of whether we short-cut
5122 // for replication or fall through to the later code.
5123 //
5124 // - otherwise, if the only defined element is an i32 or smaller value,
5125 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
5126 // This is only a win if the single defined element is used more than once.
5127 // In other cases we're better off using a single VLVGx.
5128 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
5129 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
5130
5131 // If all elements are loads, use VLREP/VLEs (below).
5132 bool AllLoads = true;
5133 for (auto Elem : Elems)
5134 if (!isVectorElementLoad(Elem)) {
5135 AllLoads = false;
5136 break;
5137 }
5138
5139 // The best way of building a v2i64 from two i64s is to use VLVGP.
5140 if (VT == MVT::v2i64 && !AllLoads)
5141 return joinDwords(DAG, DL, Elems[0], Elems[1]);
5142
5143 // Use a 64-bit merge high to combine two doubles.
5144 if (VT == MVT::v2f64 && !AllLoads)
5145 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5146
5147 // Build v4f32 values directly from the FPRs:
5148 //
5149 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
5150 // V V VMRHF
5151 // <ABxx> <CDxx>
5152 // V VMRHG
5153 // <ABCD>
5154 if (VT == MVT::v4f32 && !AllLoads) {
5155 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5156 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
5157 // Avoid unnecessary undefs by reusing the other operand.
5158 if (Op01.isUndef())
5159 Op01 = Op23;
5160 else if (Op23.isUndef())
5161 Op23 = Op01;
5162 // Merging identical replications is a no-op.
5163 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
5164 return Op01;
5165 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
5166 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
5167 SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
5168 DL, MVT::v2i64, Op01, Op23);
5169 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5170 }
5171
5172 // Collect the constant terms.
5173 SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue());
5174 SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
5175
5176 unsigned NumConstants = 0;
5177 for (unsigned I = 0; I < NumElements; ++I) {
5178 SDValue Elem = Elems[I];
5179 if (Elem.getOpcode() == ISD::Constant ||
5180 Elem.getOpcode() == ISD::ConstantFP) {
5181 NumConstants += 1;
5182 Constants[I] = Elem;
5183 Done[I] = true;
5184 }
5185 }
5186 // If there was at least one constant, fill in the other elements of
5187 // Constants with undefs to get a full vector constant and use that
5188 // as the starting point.
5189 SDValue Result;
5190 SDValue ReplicatedVal;
5191 if (NumConstants > 0) {
5192 for (unsigned I = 0; I < NumElements; ++I)
5193 if (!Constants[I].getNode())
5194 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
5195 Result = DAG.getBuildVector(VT, DL, Constants);
5196 } else {
5197 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
5198 // avoid a false dependency on any previous contents of the vector
5199 // register.
5200
5201 // Use a VLREP if at least one element is a load. Make sure to replicate
5202 // the load with the most elements having its value.
5203 std::map<const SDNode*, unsigned> UseCounts;
5204 SDNode *LoadMaxUses = nullptr;
5205 for (unsigned I = 0; I < NumElements; ++I)
5206 if (isVectorElementLoad(Elems[I])) {
5207 SDNode *Ld = Elems[I].getNode();
5208 UseCounts[Ld]++;
5209 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
5210 LoadMaxUses = Ld;
5211 }
5212 if (LoadMaxUses != nullptr) {
5213 ReplicatedVal = SDValue(LoadMaxUses, 0);
5214 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
5215 } else {
5216 // Try to use VLVGP.
5217 unsigned I1 = NumElements / 2 - 1;
5218 unsigned I2 = NumElements - 1;
5219 bool Def1 = !Elems[I1].isUndef();
5220 bool Def2 = !Elems[I2].isUndef();
5221 if (Def1 || Def2) {
5222 SDValue Elem1 = Elems[Def1 ? I1 : I2];
5223 SDValue Elem2 = Elems[Def2 ? I2 : I1];
5224 Result = DAG.getNode(ISD::BITCAST, DL, VT,
5225 joinDwords(DAG, DL, Elem1, Elem2));
5226 Done[I1] = true;
5227 Done[I2] = true;
5228 } else
5229 Result = DAG.getUNDEF(VT);
5230 }
5231 }
5232
5233 // Use VLVGx to insert the other elements.
5234 for (unsigned I = 0; I < NumElements; ++I)
5235 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
5236 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
5237 DAG.getConstant(I, DL, MVT::i32));
5238 return Result;
5239}
5240
5241SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
5242 SelectionDAG &DAG) const {
5243 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
5244 SDLoc DL(Op);
5245 EVT VT = Op.getValueType();
5246
5247 if (BVN->isConstant()) {
5248 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
5249 return Op;
5250
5251 // Fall back to loading it from memory.
5252 return SDValue();
5253 }
5254
5255 // See if we should use shuffles to construct the vector from other vectors.
5256 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
5257 return Res;
5258
5259 // Detect SCALAR_TO_VECTOR conversions.
5260 if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op))
5261 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
5262
5263 // Otherwise use buildVector to build the vector up from GPRs.
5264 unsigned NumElements = Op.getNumOperands();
5265 SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements);
5266 for (unsigned I = 0; I < NumElements; ++I)
5267 Ops[I] = Op.getOperand(I);
5268 return buildVector(DAG, DL, VT, Ops);
5269}
5270
5271SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
5272 SelectionDAG &DAG) const {
5273 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
5274 SDLoc DL(Op);
5275 EVT VT = Op.getValueType();
5276 unsigned NumElements = VT.getVectorNumElements();
5277
5278 if (VSN->isSplat()) {
5279 SDValue Op0 = Op.getOperand(0);
5280 unsigned Index = VSN->getSplatIndex();
5281 assert(Index < VT.getVectorNumElements() &&(static_cast <bool> (Index < VT.getVectorNumElements
() && "Splat index should be defined and in first operand"
) ? void (0) : __assert_fail ("Index < VT.getVectorNumElements() && \"Splat index should be defined and in first operand\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5282, __extension__
__PRETTY_FUNCTION__))
5282 "Splat index should be defined and in first operand")(static_cast <bool> (Index < VT.getVectorNumElements
() && "Splat index should be defined and in first operand"
) ? void (0) : __assert_fail ("Index < VT.getVectorNumElements() && \"Splat index should be defined and in first operand\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5282, __extension__
__PRETTY_FUNCTION__))
;
5283 // See whether the value we're splatting is directly available as a scalar.
5284 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5285 Op0.getOpcode() == ISD::BUILD_VECTOR)
5286 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
5287 // Otherwise keep it as a vector-to-vector operation.
5288 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
5289 DAG.getTargetConstant(Index, DL, MVT::i32));
5290 }
5291
5292 GeneralShuffle GS(VT);
5293 for (unsigned I = 0; I < NumElements; ++I) {
5294 int Elt = VSN->getMaskElt(I);
5295 if (Elt < 0)
5296 GS.addUndef();
5297 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
5298 unsigned(Elt) % NumElements))
5299 return SDValue();
5300 }
5301 return GS.getNode(DAG, SDLoc(VSN));
5302}
5303
5304SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
5305 SelectionDAG &DAG) const {
5306 SDLoc DL(Op);
5307 // Just insert the scalar into element 0 of an undefined vector.
5308 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
5309 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
5310 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
5311}
5312
5313SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
5314 SelectionDAG &DAG) const {
5315 // Handle insertions of floating-point values.
5316 SDLoc DL(Op);
5317 SDValue Op0 = Op.getOperand(0);
5318 SDValue Op1 = Op.getOperand(1);
5319 SDValue Op2 = Op.getOperand(2);
5320 EVT VT = Op.getValueType();
5321
5322 // Insertions into constant indices of a v2f64 can be done using VPDI.
5323 // However, if the inserted value is a bitcast or a constant then it's
5324 // better to use GPRs, as below.
5325 if (VT == MVT::v2f64 &&
5326 Op1.getOpcode() != ISD::BITCAST &&
5327 Op1.getOpcode() != ISD::ConstantFP &&
5328 Op2.getOpcode() == ISD::Constant) {
5329 uint64_t Index = cast<ConstantSDNode>(Op2)->getZExtValue();
5330 unsigned Mask = VT.getVectorNumElements() - 1;
5331 if (Index <= Mask)
5332 return Op;
5333 }
5334
5335 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
5336 MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
5337 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
5338 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
5339 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
5340 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
5341 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5342}
5343
5344SDValue
5345SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
5346 SelectionDAG &DAG) const {
5347 // Handle extractions of floating-point values.
5348 SDLoc DL(Op);
5349 SDValue Op0 = Op.getOperand(0);
5350 SDValue Op1 = Op.getOperand(1);
5351 EVT VT = Op.getValueType();
5352 EVT VecVT = Op0.getValueType();
5353
5354 // Extractions of constant indices can be done directly.
5355 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
5356 uint64_t Index = CIndexN->getZExtValue();
5357 unsigned Mask = VecVT.getVectorNumElements() - 1;
5358 if (Index <= Mask)
5359 return Op;
5360 }
5361
5362 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
5363 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
5364 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
5365 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
5366 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
5367 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5368}
5369
5370SDValue SystemZTargetLowering::
5371lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5372 SDValue PackedOp = Op.getOperand(0);
5373 EVT OutVT = Op.getValueType();
5374 EVT InVT = PackedOp.getValueType();
5375 unsigned ToBits = OutVT.getScalarSizeInBits();
5376 unsigned FromBits = InVT.getScalarSizeInBits();
5377 do {
5378 FromBits *= 2;
5379 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
5380 SystemZ::VectorBits / FromBits);
5381 PackedOp =
5382 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
5383 } while (FromBits != ToBits);
5384 return PackedOp;
5385}
5386
5387// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
5388SDValue SystemZTargetLowering::
5389lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5390 SDValue PackedOp = Op.getOperand(0);
5391 SDLoc DL(Op);
5392 EVT OutVT = Op.getValueType();
5393 EVT InVT = PackedOp.getValueType();
5394 unsigned InNumElts = InVT.getVectorNumElements();
5395 unsigned OutNumElts = OutVT.getVectorNumElements();
5396 unsigned NumInPerOut = InNumElts / OutNumElts;
5397
5398 SDValue ZeroVec =
5399 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
5400
5401 SmallVector<int, 16> Mask(InNumElts);
5402 unsigned ZeroVecElt = InNumElts;
5403 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
5404 unsigned MaskElt = PackedElt * NumInPerOut;
5405 unsigned End = MaskElt + NumInPerOut - 1;
5406 for (; MaskElt < End; MaskElt++)
5407 Mask[MaskElt] = ZeroVecElt++;
5408 Mask[MaskElt] = PackedElt;
5409 }
5410 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
5411 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
5412}
5413
5414SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
5415 unsigned ByScalar) const {
5416 // Look for cases where a vector shift can use the *_BY_SCALAR form.
5417 SDValue Op0 = Op.getOperand(0);
5418 SDValue Op1 = Op.getOperand(1);
5419 SDLoc DL(Op);
5420 EVT VT = Op.getValueType();
5421 unsigned ElemBitSize = VT.getScalarSizeInBits();
5422
5423 // See whether the shift vector is a splat represented as BUILD_VECTOR.
5424 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
5425 APInt SplatBits, SplatUndef;
5426 unsigned SplatBitSize;
5427 bool HasAnyUndefs;
5428 // Check for constant splats. Use ElemBitSize as the minimum element
5429 // width and reject splats that need wider elements.
5430 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
5431 ElemBitSize, true) &&
5432 SplatBitSize == ElemBitSize) {
5433 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
5434 DL, MVT::i32);
5435 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
5436 }
5437 // Check for variable splats.
5438 BitVector UndefElements;
5439 SDValue Splat = BVN->getSplatValue(&UndefElements);
5440 if (Splat) {
5441 // Since i32 is the smallest legal type, we either need a no-op
5442 // or a truncation.
5443 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
5444 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
5445 }
5446 }
5447
5448 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
5449 // and the shift amount is directly available in a GPR.
5450 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
5451 if (VSN->isSplat()) {
5452 SDValue VSNOp0 = VSN->getOperand(0);
5453 unsigned Index = VSN->getSplatIndex();
5454 assert(Index < VT.getVectorNumElements() &&(static_cast <bool> (Index < VT.getVectorNumElements
() && "Splat index should be defined and in first operand"
) ? void (0) : __assert_fail ("Index < VT.getVectorNumElements() && \"Splat index should be defined and in first operand\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5455, __extension__
__PRETTY_FUNCTION__))
5455 "Splat index should be defined and in first operand")(static_cast <bool> (Index < VT.getVectorNumElements
() && "Splat index should be defined and in first operand"
) ? void (0) : __assert_fail ("Index < VT.getVectorNumElements() && \"Splat index should be defined and in first operand\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5455, __extension__
__PRETTY_FUNCTION__))
;
5456 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5457 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
5458 // Since i32 is the smallest legal type, we either need a no-op
5459 // or a truncation.
5460 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
5461 VSNOp0.getOperand(Index));
5462 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
5463 }
5464 }
5465 }
5466
5467 // Otherwise just treat the current form as legal.
5468 return Op;
5469}
5470
5471SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
5472 SelectionDAG &DAG) const {
5473 switch (Op.getOpcode()) {
5474 case ISD::FRAMEADDR:
5475 return lowerFRAMEADDR(Op, DAG);
5476 case ISD::RETURNADDR:
5477 return lowerRETURNADDR(Op, DAG);
5478 case ISD::BR_CC:
5479 return lowerBR_CC(Op, DAG);
5480 case ISD::SELECT_CC:
5481 return lowerSELECT_CC(Op, DAG);
5482 case ISD::SETCC:
5483 return lowerSETCC(Op, DAG);
5484 case ISD::STRICT_FSETCC:
5485 return lowerSTRICT_FSETCC(Op, DAG, false);
5486 case ISD::STRICT_FSETCCS:
5487 return lowerSTRICT_FSETCC(Op, DAG, true);
5488 case ISD::GlobalAddress:
5489 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
5490 case ISD::GlobalTLSAddress:
5491 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
5492 case ISD::BlockAddress:
5493 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
5494 case ISD::JumpTable:
5495 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
5496 case ISD::ConstantPool:
5497 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
5498 case ISD::BITCAST:
5499 return lowerBITCAST(Op, DAG);
5500 case ISD::VASTART:
5501 return lowerVASTART(Op, DAG);
5502 case ISD::VACOPY:
5503 return lowerVACOPY(Op, DAG);
5504 case ISD::DYNAMIC_STACKALLOC:
5505 return lowerDYNAMIC_STACKALLOC(Op, DAG);
5506 case ISD::GET_DYNAMIC_AREA_OFFSET:
5507 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
5508 case ISD::SMUL_LOHI:
5509 return lowerSMUL_LOHI(Op, DAG);
5510 case ISD::UMUL_LOHI:
5511 return lowerUMUL_LOHI(Op, DAG);
5512 case ISD::SDIVREM:
5513 return lowerSDIVREM(Op, DAG);
5514 case ISD::UDIVREM:
5515 return lowerUDIVREM(Op, DAG);
5516 case ISD::SADDO:
5517 case ISD::SSUBO:
5518 case ISD::UADDO:
5519 case ISD::USUBO:
5520 return lowerXALUO(Op, DAG);
5521 case ISD::ADDCARRY:
5522 case ISD::SUBCARRY:
5523 return lowerADDSUBCARRY(Op, DAG);
5524 case ISD::OR:
5525 return lowerOR(Op, DAG);
5526 case ISD::CTPOP:
5527 return lowerCTPOP(Op, DAG);
5528 case ISD::ATOMIC_FENCE:
5529 return lowerATOMIC_FENCE(Op, DAG);
5530 case ISD::ATOMIC_SWAP:
5531 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
5532 case ISD::ATOMIC_STORE:
5533 return lowerATOMIC_STORE(Op, DAG);
5534 case ISD::ATOMIC_LOAD:
5535 return lowerATOMIC_LOAD(Op, DAG);
5536 case ISD::ATOMIC_LOAD_ADD:
5537 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
5538 case ISD::ATOMIC_LOAD_SUB:
5539 return lowerATOMIC_LOAD_SUB(Op, DAG);
5540 case ISD::ATOMIC_LOAD_AND:
5541 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
5542 case ISD::ATOMIC_LOAD_OR:
5543 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
5544 case ISD::ATOMIC_LOAD_XOR:
5545 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
5546 case ISD::ATOMIC_LOAD_NAND:
5547 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
5548 case ISD::ATOMIC_LOAD_MIN:
5549 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
5550 case ISD::ATOMIC_LOAD_MAX:
5551 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
5552 case ISD::ATOMIC_LOAD_UMIN:
5553 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
5554 case ISD::ATOMIC_LOAD_UMAX:
5555 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
5556 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
5557 return lowerATOMIC_CMP_SWAP(Op, DAG);
5558 case ISD::STACKSAVE:
5559 return lowerSTACKSAVE(Op, DAG);
5560 case ISD::STACKRESTORE:
5561 return lowerSTACKRESTORE(Op, DAG);
5562 case ISD::PREFETCH:
5563 return lowerPREFETCH(Op, DAG);
5564 case ISD::INTRINSIC_W_CHAIN:
5565 return lowerINTRINSIC_W_CHAIN(Op, DAG);
5566 case ISD::INTRINSIC_WO_CHAIN:
5567 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
5568 case ISD::BUILD_VECTOR:
5569 return lowerBUILD_VECTOR(Op, DAG);
5570 case ISD::VECTOR_SHUFFLE:
5571 return lowerVECTOR_SHUFFLE(Op, DAG);
5572 case ISD::SCALAR_TO_VECTOR:
5573 return lowerSCALAR_TO_VECTOR(Op, DAG);
5574 case ISD::INSERT_VECTOR_ELT:
5575 return lowerINSERT_VECTOR_ELT(Op, DAG);
5576 case ISD::EXTRACT_VECTOR_ELT:
5577 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
5578 case ISD::SIGN_EXTEND_VECTOR_INREG:
5579 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
5580 case ISD::ZERO_EXTEND_VECTOR_INREG:
5581 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
5582 case ISD::SHL:
5583 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
5584 case ISD::SRL:
5585 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
5586 case ISD::SRA:
5587 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
5588 default:
5589 llvm_unreachable("Unexpected node to lower")::llvm::llvm_unreachable_internal("Unexpected node to lower",
"llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5589)
;
5590 }
5591}
5592
5593// Lower operations with invalid operand or result types (currently used
5594// only for 128-bit integer types).
5595void
5596SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
5597 SmallVectorImpl<SDValue> &Results,
5598 SelectionDAG &DAG) const {
5599 switch (N->getOpcode()) {
5600 case ISD::ATOMIC_LOAD: {
5601 SDLoc DL(N);
5602 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
5603 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
5604 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5605 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
5606 DL, Tys, Ops, MVT::i128, MMO);
5607 Results.push_back(lowerGR128ToI128(DAG, Res));
5608 Results.push_back(Res.getValue(1));
5609 break;
5610 }
5611 case ISD::ATOMIC_STORE: {
5612 SDLoc DL(N);
5613 SDVTList Tys = DAG.getVTList(MVT::Other);
5614 SDValue Ops[] = { N->getOperand(0),
5615 lowerI128ToGR128(DAG, N->getOperand(2)),
5616 N->getOperand(1) };
5617 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5618 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128,
5619 DL, Tys, Ops, MVT::i128, MMO);
5620 // We have to enforce sequential consistency by performing a
5621 // serialization operation after the store.
5622 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
5623 AtomicOrdering::SequentiallyConsistent)
5624 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
5625 MVT::Other, Res), 0);
5626 Results.push_back(Res);
5627 break;
5628 }
5629 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
5630 SDLoc DL(N);
5631 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
5632 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
5633 lowerI128ToGR128(DAG, N->getOperand(2)),
5634 lowerI128ToGR128(DAG, N->getOperand(3)) };
5635 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5636 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128,
5637 DL, Tys, Ops, MVT::i128, MMO);
5638 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
5639 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);
5640 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
5641 Results.push_back(lowerGR128ToI128(DAG, Res));
5642 Results.push_back(Success);
5643 Results.push_back(Res.getValue(2));
5644 break;
5645 }
5646 case ISD::BITCAST: {
5647 SDValue Src = N->getOperand(0);
5648 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
5649 !useSoftFloat()) {
5650 SDLoc DL(N);
5651 SDValue Lo, Hi;
5652 if (getRepRegClassFor(MVT::f128) == &SystemZ::VR128BitRegClass) {
5653 SDValue VecBC = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Src);
5654 Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
5655 DAG.getConstant(1, DL, MVT::i32));
5656 Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
5657 DAG.getConstant(0, DL, MVT::i32));
5658 } else {
5659 assert(getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass &&(static_cast <bool> (getRepRegClassFor(MVT::f128) == &
SystemZ::FP128BitRegClass && "Unrecognized register class for f128."
) ? void (0) : __assert_fail ("getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass && \"Unrecognized register class for f128.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5660, __extension__
__PRETTY_FUNCTION__))
5660 "Unrecognized register class for f128.")(static_cast <bool> (getRepRegClassFor(MVT::f128) == &
SystemZ::FP128BitRegClass && "Unrecognized register class for f128."
) ? void (0) : __assert_fail ("getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass && \"Unrecognized register class for f128.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5660, __extension__
__PRETTY_FUNCTION__))
;
5661 SDValue LoFP = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
5662 DL, MVT::f64, Src);
5663 SDValue HiFP = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
5664 DL, MVT::f64, Src);
5665 Lo = DAG.getNode(ISD::BITCAST, DL, MVT::i64, LoFP);
5666 Hi = DAG.getNode(ISD::BITCAST, DL, MVT::i64, HiFP);
5667 }
5668 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi));
5669 }
5670 break;
5671 }
5672 default:
5673 llvm_unreachable("Unexpected node to lower")::llvm::llvm_unreachable_internal("Unexpected node to lower",
"llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5673)
;
5674 }
5675}
5676
5677void
5678SystemZTargetLowering::ReplaceNodeResults(SDNode *N,
5679 SmallVectorImpl<SDValue> &Results,
5680 SelectionDAG &DAG) const {
5681 return LowerOperationWrapper(N, Results, DAG);
5682}
5683
5684const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
5685#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
5686 switch ((SystemZISD::NodeType)Opcode) {
5687 case SystemZISD::FIRST_NUMBER: break;
5688 OPCODE(RET_FLAG);
5689 OPCODE(CALL);
5690 OPCODE(SIBCALL);
5691 OPCODE(TLS_GDCALL);
5692 OPCODE(TLS_LDCALL);
5693 OPCODE(PCREL_WRAPPER);
5694 OPCODE(PCREL_OFFSET);
5695 OPCODE(ICMP);
5696 OPCODE(FCMP);
5697 OPCODE(STRICT_FCMP);
5698 OPCODE(STRICT_FCMPS);
5699 OPCODE(TM);
5700 OPCODE(BR_CCMASK);
5701 OPCODE(SELECT_CCMASK);
5702 OPCODE(ADJDYNALLOC);
5703 OPCODE(PROBED_ALLOCA);
5704 OPCODE(POPCNT);
5705 OPCODE(SMUL_LOHI);
5706 OPCODE(UMUL_LOHI);
5707 OPCODE(SDIVREM);
5708 OPCODE(UDIVREM);
5709 OPCODE(SADDO);
5710 OPCODE(SSUBO);
5711 OPCODE(UADDO);
5712 OPCODE(USUBO);
5713 OPCODE(ADDCARRY);
5714 OPCODE(SUBCARRY);
5715 OPCODE(GET_CCMASK);
5716 OPCODE(MVC);
5717 OPCODE(NC);
5718 OPCODE(OC);
5719 OPCODE(XC);
5720 OPCODE(CLC);
5721 OPCODE(MEMSET_MVC);
5722 OPCODE(STPCPY);
5723 OPCODE(STRCMP);
5724 OPCODE(SEARCH_STRING);
5725 OPCODE(IPM);
5726 OPCODE(MEMBARRIER);
5727 OPCODE(TBEGIN);
5728 OPCODE(TBEGIN_NOFLOAT);
5729 OPCODE(TEND);
5730 OPCODE(BYTE_MASK);
5731 OPCODE(ROTATE_MASK);
5732 OPCODE(REPLICATE);
5733 OPCODE(JOIN_DWORDS);
5734 OPCODE(SPLAT);
5735 OPCODE(MERGE_HIGH);
5736 OPCODE(MERGE_LOW);
5737 OPCODE(SHL_DOUBLE);
5738 OPCODE(PERMUTE_DWORDS);
5739 OPCODE(PERMUTE);
5740 OPCODE(PACK);
5741 OPCODE(PACKS_CC);
5742 OPCODE(PACKLS_CC);
5743 OPCODE(UNPACK_HIGH);
5744 OPCODE(UNPACKL_HIGH);
5745 OPCODE(UNPACK_LOW);
5746 OPCODE(UNPACKL_LOW);
5747 OPCODE(VSHL_BY_SCALAR);
5748 OPCODE(VSRL_BY_SCALAR);
5749 OPCODE(VSRA_BY_SCALAR);
5750 OPCODE(VSUM);
5751 OPCODE(VICMPE);
5752 OPCODE(VICMPH);
5753 OPCODE(VICMPHL);
5754 OPCODE(VICMPES);
5755 OPCODE(VICMPHS);
5756 OPCODE(VICMPHLS);
5757 OPCODE(VFCMPE);
5758 OPCODE(STRICT_VFCMPE);
5759 OPCODE(STRICT_VFCMPES);
5760 OPCODE(VFCMPH);
5761 OPCODE(STRICT_VFCMPH);
5762 OPCODE(STRICT_VFCMPHS);
5763 OPCODE(VFCMPHE);
5764 OPCODE(STRICT_VFCMPHE);
5765 OPCODE(STRICT_VFCMPHES);
5766 OPCODE(VFCMPES);
5767 OPCODE(VFCMPHS);
5768 OPCODE(VFCMPHES);
5769 OPCODE(VFTCI);
5770 OPCODE(VEXTEND);
5771 OPCODE(STRICT_VEXTEND);
5772 OPCODE(VROUND);
5773 OPCODE(STRICT_VROUND);
5774 OPCODE(VTM);
5775 OPCODE(VFAE_CC);
5776 OPCODE(VFAEZ_CC);
5777 OPCODE(VFEE_CC);
5778 OPCODE(VFEEZ_CC);
5779 OPCODE(VFENE_CC);
5780 OPCODE(VFENEZ_CC);
5781 OPCODE(VISTR_CC);
5782 OPCODE(VSTRC_CC);
5783 OPCODE(VSTRCZ_CC);
5784 OPCODE(VSTRS_CC);
5785 OPCODE(VSTRSZ_CC);
5786 OPCODE(TDC);
5787 OPCODE(ATOMIC_SWAPW);
5788 OPCODE(ATOMIC_LOADW_ADD);
5789 OPCODE(ATOMIC_LOADW_SUB);
5790 OPCODE(ATOMIC_LOADW_AND);
5791 OPCODE(ATOMIC_LOADW_OR);
5792 OPCODE(ATOMIC_LOADW_XOR);
5793 OPCODE(ATOMIC_LOADW_NAND);
5794 OPCODE(ATOMIC_LOADW_MIN);
5795 OPCODE(ATOMIC_LOADW_MAX);
5796 OPCODE(ATOMIC_LOADW_UMIN);
5797 OPCODE(ATOMIC_LOADW_UMAX);
5798 OPCODE(ATOMIC_CMP_SWAPW);
5799 OPCODE(ATOMIC_CMP_SWAP);
5800 OPCODE(ATOMIC_LOAD_128);
5801 OPCODE(ATOMIC_STORE_128);
5802 OPCODE(ATOMIC_CMP_SWAP_128);
5803 OPCODE(LRV);
5804 OPCODE(STRV);
5805 OPCODE(VLER);
5806 OPCODE(VSTER);
5807 OPCODE(PREFETCH);
5808 }
5809 return nullptr;
5810#undef OPCODE
5811}
5812
5813// Return true if VT is a vector whose elements are a whole number of bytes
5814// in width. Also check for presence of vector support.
5815bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
5816 if (!Subtarget.hasVector())
5817 return false;
5818
5819 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
5820}
5821
5822// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
5823// producing a result of type ResVT. Op is a possibly bitcast version
5824// of the input vector and Index is the index (based on type VecVT) that
5825// should be extracted. Return the new extraction if a simplification
5826// was possible or if Force is true.
5827SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
5828 EVT VecVT, SDValue Op,
5829 unsigned Index,
5830 DAGCombinerInfo &DCI,
5831 bool Force) const {
5832 SelectionDAG &DAG = DCI.DAG;
5833
5834 // The number of bytes being extracted.
5835 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
5836
5837 for (;;) {
5838 unsigned Opcode = Op.getOpcode();
5839 if (Opcode == ISD::BITCAST)
5840 // Look through bitcasts.
5841 Op = Op.getOperand(0);
5842 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
5843 canTreatAsByteVector(Op.getValueType())) {
5844 // Get a VPERM-like permute mask and see whether the bytes covered
5845 // by the extracted element are a contiguous sequence from one
5846 // source operand.
5847 SmallVector<int, SystemZ::VectorBytes> Bytes;
5848 if (!getVPermMask(Op, Bytes))
5849 break;
5850 int First;
5851 if (!getShuffleInput(Bytes, Index * BytesPerElement,
5852 BytesPerElement, First))
5853 break;
5854 if (First < 0)
5855 return DAG.getUNDEF(ResVT);
5856 // Make sure the contiguous sequence starts at a multiple of the
5857 // original element size.
5858 unsigned Byte = unsigned(First) % Bytes.size();
5859 if (Byte % BytesPerElement != 0)
5860 break;
5861 // We can get the extracted value directly from an input.
5862 Index = Byte / BytesPerElement;
5863 Op = Op.getOperand(unsigned(First) / Bytes.size());
5864 Force = true;
5865 } else if (Opcode == ISD::BUILD_VECTOR &&
5866 canTreatAsByteVector(Op.getValueType())) {
5867 // We can only optimize this case if the BUILD_VECTOR elements are
5868 // at least as wide as the extracted value.
5869 EVT OpVT = Op.getValueType();
5870 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
5871 if (OpBytesPerElement < BytesPerElement)
5872 break;
5873 // Make sure that the least-significant bit of the extracted value
5874 // is the least significant bit of an input.
5875 unsigned End = (Index + 1) * BytesPerElement;
5876 if (End % OpBytesPerElement != 0)
5877 break;
5878 // We're extracting the low part of one operand of the BUILD_VECTOR.
5879 Op = Op.getOperand(End / OpBytesPerElement - 1);
5880 if (!Op.getValueType().isInteger()) {
5881 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
5882 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
5883 DCI.AddToWorklist(Op.getNode());
5884 }
5885 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
5886 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
5887 if (VT != ResVT) {
5888 DCI.AddToWorklist(Op.getNode());
5889 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
5890 }
5891 return Op;
5892 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
5893 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
5894 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
5895 canTreatAsByteVector(Op.getValueType()) &&
5896 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
5897 // Make sure that only the unextended bits are significant.
5898 EVT ExtVT = Op.getValueType();
5899 EVT OpVT = Op.getOperand(0).getValueType();
5900 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
5901 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
5902 unsigned Byte = Index * BytesPerElement;
5903 unsigned SubByte = Byte % ExtBytesPerElement;
5904 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
5905 if (SubByte < MinSubByte ||
5906 SubByte + BytesPerElement > ExtBytesPerElement)
5907 break;
5908 // Get the byte offset of the unextended element
5909 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
5910 // ...then add the byte offset relative to that element.
5911 Byte += SubByte - MinSubByte;
5912 if (Byte % BytesPerElement != 0)
5913 break;
5914 Op = Op.getOperand(0);
5915 Index = Byte / BytesPerElement;
5916 Force = true;
5917 } else
5918 break;
5919 }
5920 if (Force) {
5921 if (Op.getValueType() != VecVT) {
5922 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
5923 DCI.AddToWorklist(Op.getNode());
5924 }
5925 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
5926 DAG.getConstant(Index, DL, MVT::i32));
5927 }
5928 return SDValue();
5929}
5930
5931// Optimize vector operations in scalar value Op on the basis that Op
5932// is truncated to TruncVT.
5933SDValue SystemZTargetLowering::combineTruncateExtract(
5934 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
5935 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
5936 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
5937 // of type TruncVT.
5938 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5939 TruncVT.getSizeInBits() % 8 == 0) {
5940 SDValue Vec = Op.getOperand(0);
5941 EVT VecVT = Vec.getValueType();
5942 if (canTreatAsByteVector(VecVT)) {
5943 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
5944 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
5945 unsigned TruncBytes = TruncVT.getStoreSize();
5946 if (BytesPerElement % TruncBytes == 0) {
5947 // Calculate the value of Y' in the above description. We are
5948 // splitting the original elements into Scale equal-sized pieces
5949 // and for truncation purposes want the last (least-significant)
5950 // of these pieces for IndexN. This is easiest to do by calculating
5951 // the start index of the following element and then subtracting 1.
5952 unsigned Scale = BytesPerElement / TruncBytes;
5953 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
5954
5955 // Defer the creation of the bitcast from X to combineExtract,
5956 // which might be able to optimize the extraction.
5957 VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
5958 VecVT.getStoreSize() / TruncBytes);
5959 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
5960 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
5961 }
5962 }
5963 }
5964 }
5965 return SDValue();
5966}
5967
5968SDValue SystemZTargetLowering::combineZERO_EXTEND(
5969 SDNode *N, DAGCombinerInfo &DCI) const {
5970 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
5971 SelectionDAG &DAG = DCI.DAG;
5972 SDValue N0 = N->getOperand(0);
5973 EVT VT = N->getValueType(0);
5974 if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) {
5975 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
5976 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5977 if (TrueOp && FalseOp) {
5978 SDLoc DL(N0);
5979 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
5980 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
5981 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
5982 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
5983 // If N0 has multiple uses, change other uses as well.
5984 if (!N0.hasOneUse()) {
5985 SDValue TruncSelect =
5986 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
5987 DCI.CombineTo(N0.getNode(), TruncSelect);
5988 }
5989 return NewSelect;
5990 }
5991 }
5992 return SDValue();
5993}
5994
5995SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
5996 SDNode *N, DAGCombinerInfo &DCI)