Bug Summary

File:llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
Warning:line 782, column 36
The result of the left shift is undefined due to shifting by '18446744073709551615', which is greater or equal to the width of type 'uint64_t'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SystemZISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/build-llvm -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/SystemZ -I /build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ -I include -I /build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-command-line-argument -Wno-unknown-warning-option -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/build-llvm -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-26-234817-15343-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
15#include "SystemZConstantPoolValue.h"
16#include "SystemZMachineFunctionInfo.h"
17#include "SystemZTargetMachine.h"
18#include "llvm/CodeGen/CallingConvLower.h"
19#include "llvm/CodeGen/MachineInstrBuilder.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
22#include "llvm/IR/IntrinsicInst.h"
23#include "llvm/IR/Intrinsics.h"
24#include "llvm/IR/IntrinsicsS390.h"
25#include "llvm/Support/CommandLine.h"
26#include "llvm/Support/KnownBits.h"
27#include <cctype>
28
29using namespace llvm;
30
31#define DEBUG_TYPE"systemz-lower" "systemz-lower"
32
33namespace {
34// Represents information about a comparison.
35struct Comparison {
36 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
37 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
38 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
39
40 // The operands to the comparison.
41 SDValue Op0, Op1;
42
43 // Chain if this is a strict floating-point comparison.
44 SDValue Chain;
45
46 // The opcode that should be used to compare Op0 and Op1.
47 unsigned Opcode;
48
49 // A SystemZICMP value. Only used for integer comparisons.
50 unsigned ICmpType;
51
52 // The mask of CC values that Opcode can produce.
53 unsigned CCValid;
54
55 // The mask of CC values for which the original condition is true.
56 unsigned CCMask;
57};
58} // end anonymous namespace
59
60// Classify VT as either 32 or 64 bit.
61static bool is32Bit(EVT VT) {
62 switch (VT.getSimpleVT().SimpleTy) {
63 case MVT::i32:
64 return true;
65 case MVT::i64:
66 return false;
67 default:
68 llvm_unreachable("Unsupported type")::llvm::llvm_unreachable_internal("Unsupported type", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 68)
;
69 }
70}
71
72// Return a version of MachineOperand that can be safely used before the
73// final use.
74static MachineOperand earlyUseOperand(MachineOperand Op) {
75 if (Op.isReg())
76 Op.setIsKill(false);
77 return Op;
78}
79
80SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
81 const SystemZSubtarget &STI)
82 : TargetLowering(TM), Subtarget(STI) {
83 MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0));
84
85 auto *Regs = STI.getSpecialRegisters();
86
87 // Set up the register classes.
88 if (Subtarget.hasHighWord())
89 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
90 else
91 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
92 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
93 if (!useSoftFloat()) {
94 if (Subtarget.hasVector()) {
95 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
96 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
97 } else {
98 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
99 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
100 }
101 if (Subtarget.hasVectorEnhancements1())
102 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
103 else
104 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
105
106 if (Subtarget.hasVector()) {
107 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
108 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
109 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
110 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
111 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
112 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
113 }
114 }
115
116 // Compute derived properties from the register classes
117 computeRegisterProperties(Subtarget.getRegisterInfo());
118
119 // Set up special registers.
120 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
121
122 // TODO: It may be better to default to latency-oriented scheduling, however
123 // LLVM's current latency-oriented scheduler can't handle physreg definitions
124 // such as SystemZ has with CC, so set this to the register-pressure
125 // scheduler, because it can.
126 setSchedulingPreference(Sched::RegPressure);
127
128 setBooleanContents(ZeroOrOneBooleanContent);
129 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
130
131 // Instructions are strings of 2-byte aligned 2-byte values.
132 setMinFunctionAlignment(Align(2));
133 // For performance reasons we prefer 16-byte alignment.
134 setPrefFunctionAlignment(Align(16));
135
136 // Handle operations that are handled in a similar way for all types.
137 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
138 I <= MVT::LAST_FP_VALUETYPE;
139 ++I) {
140 MVT VT = MVT::SimpleValueType(I);
141 if (isTypeLegal(VT)) {
142 // Lower SET_CC into an IPM-based sequence.
143 setOperationAction(ISD::SETCC, VT, Custom);
144 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
145 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
146
147 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
148 setOperationAction(ISD::SELECT, VT, Expand);
149
150 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
151 setOperationAction(ISD::SELECT_CC, VT, Custom);
152 setOperationAction(ISD::BR_CC, VT, Custom);
153 }
154 }
155
156 // Expand jump table branches as address arithmetic followed by an
157 // indirect jump.
158 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
159
160 // Expand BRCOND into a BR_CC (see above).
161 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
162
163 // Handle integer types.
164 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
165 I <= MVT::LAST_INTEGER_VALUETYPE;
166 ++I) {
167 MVT VT = MVT::SimpleValueType(I);
168 if (isTypeLegal(VT)) {
169 setOperationAction(ISD::ABS, VT, Legal);
170
171 // Expand individual DIV and REMs into DIVREMs.
172 setOperationAction(ISD::SDIV, VT, Expand);
173 setOperationAction(ISD::UDIV, VT, Expand);
174 setOperationAction(ISD::SREM, VT, Expand);
175 setOperationAction(ISD::UREM, VT, Expand);
176 setOperationAction(ISD::SDIVREM, VT, Custom);
177 setOperationAction(ISD::UDIVREM, VT, Custom);
178
179 // Support addition/subtraction with overflow.
180 setOperationAction(ISD::SADDO, VT, Custom);
181 setOperationAction(ISD::SSUBO, VT, Custom);
182
183 // Support addition/subtraction with carry.
184 setOperationAction(ISD::UADDO, VT, Custom);
185 setOperationAction(ISD::USUBO, VT, Custom);
186
187 // Support carry in as value rather than glue.
188 setOperationAction(ISD::ADDCARRY, VT, Custom);
189 setOperationAction(ISD::SUBCARRY, VT, Custom);
190
191 // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
192 // stores, putting a serialization instruction after the stores.
193 setOperationAction(ISD::ATOMIC_LOAD, VT, Custom);
194 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
195
196 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
197 // available, or if the operand is constant.
198 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
199
200 // Use POPCNT on z196 and above.
201 if (Subtarget.hasPopulationCount())
202 setOperationAction(ISD::CTPOP, VT, Custom);
203 else
204 setOperationAction(ISD::CTPOP, VT, Expand);
205
206 // No special instructions for these.
207 setOperationAction(ISD::CTTZ, VT, Expand);
208 setOperationAction(ISD::ROTR, VT, Expand);
209
210 // Use *MUL_LOHI where possible instead of MULH*.
211 setOperationAction(ISD::MULHS, VT, Expand);
212 setOperationAction(ISD::MULHU, VT, Expand);
213 setOperationAction(ISD::SMUL_LOHI, VT, Custom);
214 setOperationAction(ISD::UMUL_LOHI, VT, Custom);
215
216 // Only z196 and above have native support for conversions to unsigned.
217 // On z10, promoting to i64 doesn't generate an inexact condition for
218 // values that are outside the i32 range but in the i64 range, so use
219 // the default expansion.
220 if (!Subtarget.hasFPExtension())
221 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
222
223 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
224 // default to Expand, so need to be modified to Legal where appropriate.
225 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal);
226 if (Subtarget.hasFPExtension())
227 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal);
228
229 // And similarly for STRICT_[SU]INT_TO_FP.
230 setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal);
231 if (Subtarget.hasFPExtension())
232 setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal);
233 }
234 }
235
236 // Type legalization will convert 8- and 16-bit atomic operations into
237 // forms that operate on i32s (but still keeping the original memory VT).
238 // Lower them into full i32 operations.
239 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom);
240 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom);
241 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
242 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
243 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom);
244 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom);
245 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
246 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom);
247 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom);
248 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
249 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
250
251 // Even though i128 is not a legal type, we still need to custom lower
252 // the atomic operations in order to exploit SystemZ instructions.
253 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
254 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
255
256 // We can use the CC result of compare-and-swap to implement
257 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
258 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom);
259 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom);
260 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
261
262 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
263
264 // Traps are legal, as we will convert them to "j .+2".
265 setOperationAction(ISD::TRAP, MVT::Other, Legal);
266
267 // z10 has instructions for signed but not unsigned FP conversion.
268 // Handle unsigned 32-bit types as signed 64-bit types.
269 if (!Subtarget.hasFPExtension()) {
270 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
271 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
272 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Promote);
273 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
274 }
275
276 // We have native support for a 64-bit CTLZ, via FLOGR.
277 setOperationAction(ISD::CTLZ, MVT::i32, Promote);
278 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
279 setOperationAction(ISD::CTLZ, MVT::i64, Legal);
280
281 // On z15 we have native support for a 64-bit CTPOP.
282 if (Subtarget.hasMiscellaneousExtensions3()) {
283 setOperationAction(ISD::CTPOP, MVT::i32, Promote);
284 setOperationAction(ISD::CTPOP, MVT::i64, Legal);
285 }
286
287 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
288 setOperationAction(ISD::OR, MVT::i64, Custom);
289
290 // Expand 128 bit shifts without using a libcall.
291 setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
292 setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
293 setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
294 setLibcallName(RTLIB::SRL_I128, nullptr);
295 setLibcallName(RTLIB::SHL_I128, nullptr);
296 setLibcallName(RTLIB::SRA_I128, nullptr);
297
298 // Handle bitcast from fp128 to i128.
299 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
300
301 // We have native instructions for i8, i16 and i32 extensions, but not i1.
302 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
303 for (MVT VT : MVT::integer_valuetypes()) {
304 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
305 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
306 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
307 }
308
309 // Handle the various types of symbolic address.
310 setOperationAction(ISD::ConstantPool, PtrVT, Custom);
311 setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
312 setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
313 setOperationAction(ISD::BlockAddress, PtrVT, Custom);
314 setOperationAction(ISD::JumpTable, PtrVT, Custom);
315
316 // We need to handle dynamic allocations specially because of the
317 // 160-byte area at the bottom of the stack.
318 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
319 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom);
320
321 // Use custom expanders so that we can force the function to use
322 // a frame pointer.
323 setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
324 setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
325
326 // Handle prefetches with PFD or PFDRL.
327 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
328
329 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
330 // Assume by default that all vector operations need to be expanded.
331 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
332 if (getOperationAction(Opcode, VT) == Legal)
333 setOperationAction(Opcode, VT, Expand);
334
335 // Likewise all truncating stores and extending loads.
336 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
337 setTruncStoreAction(VT, InnerVT, Expand);
338 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
339 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
340 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
341 }
342
343 if (isTypeLegal(VT)) {
344 // These operations are legal for anything that can be stored in a
345 // vector register, even if there is no native support for the format
346 // as such. In particular, we can do these for v4f32 even though there
347 // are no specific instructions for that format.
348 setOperationAction(ISD::LOAD, VT, Legal);
349 setOperationAction(ISD::STORE, VT, Legal);
350 setOperationAction(ISD::VSELECT, VT, Legal);
351 setOperationAction(ISD::BITCAST, VT, Legal);
352 setOperationAction(ISD::UNDEF, VT, Legal);
353
354 // Likewise, except that we need to replace the nodes with something
355 // more specific.
356 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
357 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
358 }
359 }
360
361 // Handle integer vector types.
362 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
363 if (isTypeLegal(VT)) {
364 // These operations have direct equivalents.
365 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
366 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
367 setOperationAction(ISD::ADD, VT, Legal);
368 setOperationAction(ISD::SUB, VT, Legal);
369 if (VT != MVT::v2i64)
370 setOperationAction(ISD::MUL, VT, Legal);
371 setOperationAction(ISD::ABS, VT, Legal);
372 setOperationAction(ISD::AND, VT, Legal);
373 setOperationAction(ISD::OR, VT, Legal);
374 setOperationAction(ISD::XOR, VT, Legal);
375 if (Subtarget.hasVectorEnhancements1())
376 setOperationAction(ISD::CTPOP, VT, Legal);
377 else
378 setOperationAction(ISD::CTPOP, VT, Custom);
379 setOperationAction(ISD::CTTZ, VT, Legal);
380 setOperationAction(ISD::CTLZ, VT, Legal);
381
382 // Convert a GPR scalar to a vector by inserting it into element 0.
383 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
384
385 // Use a series of unpacks for extensions.
386 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
387 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
388
389 // Detect shifts by a scalar amount and convert them into
390 // V*_BY_SCALAR.
391 setOperationAction(ISD::SHL, VT, Custom);
392 setOperationAction(ISD::SRA, VT, Custom);
393 setOperationAction(ISD::SRL, VT, Custom);
394
395 // At present ROTL isn't matched by DAGCombiner. ROTR should be
396 // converted into ROTL.
397 setOperationAction(ISD::ROTL, VT, Expand);
398 setOperationAction(ISD::ROTR, VT, Expand);
399
400 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
401 // and inverting the result as necessary.
402 setOperationAction(ISD::SETCC, VT, Custom);
403 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
404 if (Subtarget.hasVectorEnhancements1())
405 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
406 }
407 }
408
409 if (Subtarget.hasVector()) {
410 // There should be no need to check for float types other than v2f64
411 // since <2 x f32> isn't a legal type.
412 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
413 setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal);
414 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
415 setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal);
416 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
417 setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal);
418 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
419 setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);
420
421 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
422 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal);
423 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
424 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal);
425 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
426 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f64, Legal);
427 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
428 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f64, Legal);
429 }
430
431 if (Subtarget.hasVectorEnhancements2()) {
432 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
433 setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal);
434 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
435 setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal);
436 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
437 setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal);
438 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
439 setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal);
440
441 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
442 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal);
443 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
444 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal);
445 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
446 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f32, Legal);
447 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
448 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f32, Legal);
449 }
450
451 // Handle floating-point types.
452 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
453 I <= MVT::LAST_FP_VALUETYPE;
454 ++I) {
455 MVT VT = MVT::SimpleValueType(I);
456 if (isTypeLegal(VT)) {
457 // We can use FI for FRINT.
458 setOperationAction(ISD::FRINT, VT, Legal);
459
460 // We can use the extended form of FI for other rounding operations.
461 if (Subtarget.hasFPExtension()) {
462 setOperationAction(ISD::FNEARBYINT, VT, Legal);
463 setOperationAction(ISD::FFLOOR, VT, Legal);
464 setOperationAction(ISD::FCEIL, VT, Legal);
465 setOperationAction(ISD::FTRUNC, VT, Legal);
466 setOperationAction(ISD::FROUND, VT, Legal);
467 }
468
469 // No special instructions for these.
470 setOperationAction(ISD::FSIN, VT, Expand);
471 setOperationAction(ISD::FCOS, VT, Expand);
472 setOperationAction(ISD::FSINCOS, VT, Expand);
473 setOperationAction(ISD::FREM, VT, Expand);
474 setOperationAction(ISD::FPOW, VT, Expand);
475
476 // Handle constrained floating-point operations.
477 setOperationAction(ISD::STRICT_FADD, VT, Legal);
478 setOperationAction(ISD::STRICT_FSUB, VT, Legal);
479 setOperationAction(ISD::STRICT_FMUL, VT, Legal);
480 setOperationAction(ISD::STRICT_FDIV, VT, Legal);
481 setOperationAction(ISD::STRICT_FMA, VT, Legal);
482 setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
483 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
484 setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal);
485 setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
486 if (Subtarget.hasFPExtension()) {
487 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
488 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
489 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
490 setOperationAction(ISD::STRICT_FROUND, VT, Legal);
491 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
492 }
493 }
494 }
495
496 // Handle floating-point vector types.
497 if (Subtarget.hasVector()) {
498 // Scalar-to-vector conversion is just a subreg.
499 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
500 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
501
502 // Some insertions and extractions can be done directly but others
503 // need to go via integers.
504 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
505 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
506 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
507 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
508
509 // These operations have direct equivalents.
510 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
511 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
512 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
513 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
514 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
515 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
516 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
517 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
518 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
519 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
520 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
521 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
522 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
523 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
524
525 // Handle constrained floating-point operations.
526 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
527 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
528 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
529 setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
530 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
531 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
532 setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
533 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
534 setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
535 setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
536 setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
537 setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
538 }
539
540 // The vector enhancements facility 1 has instructions for these.
541 if (Subtarget.hasVectorEnhancements1()) {
542 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
543 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
544 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
545 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
546 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
547 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
548 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
549 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
550 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
551 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
552 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
553 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
554 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
555 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
556
557 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
558 setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal);
559 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
560 setOperationAction(ISD::FMINIMUM, MVT::f64, Legal);
561
562 setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal);
563 setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal);
564 setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);
565 setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal);
566
567 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
568 setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
569 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
570 setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
571
572 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
573 setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
574 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
575 setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
576
577 setOperationAction(ISD::FMAXNUM, MVT::f128, Legal);
578 setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
579 setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
580 setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
581
582 // Handle constrained floating-point operations.
583 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
584 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
585 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
586 setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
587 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
588 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
589 setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
590 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
591 setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
592 setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
593 setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
594 setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
595 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
596 MVT::v4f32, MVT::v2f64 }) {
597 setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal);
598 setOperationAction(ISD::STRICT_FMINNUM, VT, Legal);
599 setOperationAction(ISD::STRICT_FMAXIMUM, VT, Legal);
600 setOperationAction(ISD::STRICT_FMINIMUM, VT, Legal);
601 }
602 }
603
604 // We only have fused f128 multiply-addition on vector registers.
605 if (!Subtarget.hasVectorEnhancements1()) {
606 setOperationAction(ISD::FMA, MVT::f128, Expand);
607 setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand);
608 }
609
610 // We don't have a copysign instruction on vector registers.
611 if (Subtarget.hasVectorEnhancements1())
612 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
613
614 // Needed so that we don't try to implement f128 constant loads using
615 // a load-and-extend of a f80 constant (in cases where the constant
616 // would fit in an f80).
617 for (MVT VT : MVT::fp_valuetypes())
618 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
619
620 // We don't have extending load instruction on vector registers.
621 if (Subtarget.hasVectorEnhancements1()) {
622 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
623 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
624 }
625
626 // Floating-point truncation and stores need to be done separately.
627 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
628 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
629 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
630
631 // We have 64-bit FPR<->GPR moves, but need special handling for
632 // 32-bit forms.
633 if (!Subtarget.hasVector()) {
634 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
635 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
636 }
637
638 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
639 // structure, but VAEND is a no-op.
640 setOperationAction(ISD::VASTART, MVT::Other, Custom);
641 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
642 setOperationAction(ISD::VAEND, MVT::Other, Expand);
643
644 // Codes for which we want to perform some z-specific combinations.
645 setTargetDAGCombine(ISD::ZERO_EXTEND);
646 setTargetDAGCombine(ISD::SIGN_EXTEND);
647 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
648 setTargetDAGCombine(ISD::LOAD);
649 setTargetDAGCombine(ISD::STORE);
650 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
651 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
652 setTargetDAGCombine(ISD::FP_ROUND);
653 setTargetDAGCombine(ISD::STRICT_FP_ROUND);
654 setTargetDAGCombine(ISD::FP_EXTEND);
655 setTargetDAGCombine(ISD::SINT_TO_FP);
656 setTargetDAGCombine(ISD::UINT_TO_FP);
657 setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
658 setTargetDAGCombine(ISD::BSWAP);
659 setTargetDAGCombine(ISD::SDIV);
660 setTargetDAGCombine(ISD::UDIV);
661 setTargetDAGCombine(ISD::SREM);
662 setTargetDAGCombine(ISD::UREM);
663 setTargetDAGCombine(ISD::INTRINSIC_VOID);
664 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
665
666 // Handle intrinsics.
667 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
668 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
669
670 // We want to use MVC in preference to even a single load/store pair.
671 MaxStoresPerMemcpy = 0;
672 MaxStoresPerMemcpyOptSize = 0;
673
674 // The main memset sequence is a byte store followed by an MVC.
675 // Two STC or MV..I stores win over that, but the kind of fused stores
676 // generated by target-independent code don't when the byte value is
677 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
678 // than "STC;MVC". Handle the choice in target-specific code instead.
679 MaxStoresPerMemset = 0;
680 MaxStoresPerMemsetOptSize = 0;
681
682 // Default to having -disable-strictnode-mutation on
683 IsStrictFPEnabled = true;
684}
685
686bool SystemZTargetLowering::useSoftFloat() const {
687 return Subtarget.hasSoftFloat();
688}
689
690EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
691 LLVMContext &, EVT VT) const {
692 if (!VT.isVector())
693 return MVT::i32;
694 return VT.changeVectorElementTypeToInteger();
695}
696
697bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(
698 const MachineFunction &MF, EVT VT) const {
699 VT = VT.getScalarType();
700
701 if (!VT.isSimple())
702 return false;
703
704 switch (VT.getSimpleVT().SimpleTy) {
705 case MVT::f32:
706 case MVT::f64:
707 return true;
708 case MVT::f128:
709 return Subtarget.hasVectorEnhancements1();
710 default:
711 break;
712 }
713
714 return false;
715}
716
717// Return true if the constant can be generated with a vector instruction,
718// such as VGM, VGMB or VREPI.
719bool SystemZVectorConstantInfo::isVectorConstantLegal(
720 const SystemZSubtarget &Subtarget) {
721 const SystemZInstrInfo *TII =
722 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
723 if (!Subtarget.hasVector() ||
3
Assuming the condition is false
724 (isFP128 && !Subtarget.hasVectorEnhancements1()))
4
Assuming field 'isFP128' is false
725 return false;
726
727 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
728 // preferred way of creating all-zero and all-one vectors so give it
729 // priority over other methods below.
730 unsigned Mask = 0;
731 unsigned I = 0;
732 for (; I < SystemZ::VectorBytes; ++I) {
5
Loop condition is true. Entering loop body
733 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
734 if (Byte == 0xff)
6
Assuming 'Byte' is not equal to 255
7
Taking false branch
735 Mask |= 1ULL << I;
736 else if (Byte != 0)
8
Assuming 'Byte' is not equal to 0
9
Taking true branch
737 break;
738 }
739 if (I
10.1
'I' is not equal to 'VectorBytes'
10.1
'I' is not equal to 'VectorBytes'
10.1
'I' is not equal to 'VectorBytes'
== SystemZ::VectorBytes) {
10
Execution continues on line 739
11
Taking false branch
740 Opcode = SystemZISD::BYTE_MASK;
741 OpVals.push_back(Mask);
742 VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16);
743 return true;
744 }
745
746 if (SplatBitSize
11.1
Field 'SplatBitSize' is <= 64
11.1
Field 'SplatBitSize' is <= 64
11.1
Field 'SplatBitSize' is <= 64
> 64)
12
Taking false branch
747 return false;
748
749 auto tryValue = [&](uint64_t Value) -> bool {
750 // Try VECTOR REPLICATE IMMEDIATE
751 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
752 if (isInt<16>(SignedValue)) {
753 OpVals.push_back(((unsigned) SignedValue));
754 Opcode = SystemZISD::REPLICATE;
755 VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
756 SystemZ::VectorBits / SplatBitSize);
757 return true;
758 }
759 // Try VECTOR GENERATE MASK
760 unsigned Start, End;
761 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
762 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
763 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
764 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
765 OpVals.push_back(Start - (64 - SplatBitSize));
766 OpVals.push_back(End - (64 - SplatBitSize));
767 Opcode = SystemZISD::ROTATE_MASK;
768 VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
769 SystemZ::VectorBits / SplatBitSize);
770 return true;
771 }
772 return false;
773 };
774
775 // First try assuming that any undefined bits above the highest set bit
776 // and below the lowest set bit are 1s. This increases the likelihood of
777 // being able to use a sign-extended element value in VECTOR REPLICATE
778 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
779 uint64_t SplatBitsZ = SplatBits.getZExtValue();
780 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
781 uint64_t Lower =
782 (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
13
Calling 'findFirstSet<unsigned long>'
20
Returning from 'findFirstSet<unsigned long>'
21
The result of the left shift is undefined due to shifting by '18446744073709551615', which is greater or equal to the width of type 'uint64_t'
783 uint64_t Upper =
784 (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
785 if (tryValue(SplatBitsZ | Upper | Lower))
786 return true;
787
788 // Now try assuming that any undefined bits between the first and
789 // last defined set bits are set. This increases the chances of
790 // using a non-wraparound mask.
791 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
792 return tryValue(SplatBitsZ | Middle);
793}
794
795SystemZVectorConstantInfo::SystemZVectorConstantInfo(APFloat FPImm) {
796 IntBits = FPImm.bitcastToAPInt().zextOrSelf(128);
797 isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
798 SplatBits = FPImm.bitcastToAPInt();
799 unsigned Width = SplatBits.getBitWidth();
800 IntBits <<= (SystemZ::VectorBits - Width);
801
802 // Find the smallest splat.
803 while (Width > 8) {
804 unsigned HalfSize = Width / 2;
805 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
806 APInt LowValue = SplatBits.trunc(HalfSize);
807
808 // If the two halves do not match, stop here.
809 if (HighValue != LowValue || 8 > HalfSize)
810 break;
811
812 SplatBits = HighValue;
813 Width = HalfSize;
814 }
815 SplatUndef = 0;
816 SplatBitSize = Width;
817}
818
819SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) {
820 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR")(static_cast <bool> (BVN->isConstant() && "Expected a constant BUILD_VECTOR"
) ? void (0) : __assert_fail ("BVN->isConstant() && \"Expected a constant BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 820, __extension__ __PRETTY_FUNCTION__))
;
821 bool HasAnyUndefs;
822
823 // Get IntBits by finding the 128 bit splat.
824 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
825 true);
826
827 // Get SplatBits by finding the 8 bit or greater splat.
828 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
829 true);
830}
831
832bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
833 bool ForCodeSize) const {
834 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
835 if (Imm.isZero() || Imm.isNegZero())
1
Taking false branch
836 return true;
837
838 return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
2
Calling 'SystemZVectorConstantInfo::isVectorConstantLegal'
839}
840
841/// Returns true if stack probing through inline assembly is requested.
842bool SystemZTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
843 // If the function specifically requests inline stack probes, emit them.
844 if (MF.getFunction().hasFnAttribute("probe-stack"))
845 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
846 "inline-asm";
847 return false;
848}
849
850bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
851 // We can use CGFI or CLGFI.
852 return isInt<32>(Imm) || isUInt<32>(Imm);
853}
854
855bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
856 // We can use ALGFI or SLGFI.
857 return isUInt<32>(Imm) || isUInt<32>(-Imm);
858}
859
860bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
861 EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const {
862 // Unaligned accesses should never be slower than the expanded version.
863 // We check specifically for aligned accesses in the few cases where
864 // they are required.
865 if (Fast)
866 *Fast = true;
867 return true;
868}
869
870// Information about the addressing mode for a memory access.
871struct AddressingMode {
872 // True if a long displacement is supported.
873 bool LongDisplacement;
874
875 // True if use of index register is supported.
876 bool IndexReg;
877
878 AddressingMode(bool LongDispl, bool IdxReg) :
879 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
880};
881
882// Return the desired addressing mode for a Load which has only one use (in
883// the same block) which is a Store.
884static AddressingMode getLoadStoreAddrMode(bool HasVector,
885 Type *Ty) {
886 // With vector support a Load->Store combination may be combined to either
887 // an MVC or vector operations and it seems to work best to allow the
888 // vector addressing mode.
889 if (HasVector)
890 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
891
892 // Otherwise only the MVC case is special.
893 bool MVC = Ty->isIntegerTy(8);
894 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
895}
896
897// Return the addressing mode which seems most desirable given an LLVM
898// Instruction pointer.
899static AddressingMode
900supportedAddressingMode(Instruction *I, bool HasVector) {
901 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
902 switch (II->getIntrinsicID()) {
903 default: break;
904 case Intrinsic::memset:
905 case Intrinsic::memmove:
906 case Intrinsic::memcpy:
907 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
908 }
909 }
910
911 if (isa<LoadInst>(I) && I->hasOneUse()) {
912 auto *SingleUser = cast<Instruction>(*I->user_begin());
913 if (SingleUser->getParent() == I->getParent()) {
914 if (isa<ICmpInst>(SingleUser)) {
915 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
916 if (C->getBitWidth() <= 64 &&
917 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
918 // Comparison of memory with 16 bit signed / unsigned immediate
919 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
920 } else if (isa<StoreInst>(SingleUser))
921 // Load->Store
922 return getLoadStoreAddrMode(HasVector, I->getType());
923 }
924 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
925 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
926 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
927 // Load->Store
928 return getLoadStoreAddrMode(HasVector, LoadI->getType());
929 }
930
931 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
932
933 // * Use LDE instead of LE/LEY for z13 to avoid partial register
934 // dependencies (LDE only supports small offsets).
935 // * Utilize the vector registers to hold floating point
936 // values (vector load / store instructions only support small
937 // offsets).
938
939 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
940 I->getOperand(0)->getType());
941 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
942 bool IsVectorAccess = MemAccessTy->isVectorTy();
943
944 // A store of an extracted vector element will be combined into a VSTE type
945 // instruction.
946 if (!IsVectorAccess && isa<StoreInst>(I)) {
947 Value *DataOp = I->getOperand(0);
948 if (isa<ExtractElementInst>(DataOp))
949 IsVectorAccess = true;
950 }
951
952 // A load which gets inserted into a vector element will be combined into a
953 // VLE type instruction.
954 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
955 User *LoadUser = *I->user_begin();
956 if (isa<InsertElementInst>(LoadUser))
957 IsVectorAccess = true;
958 }
959
960 if (IsFPAccess || IsVectorAccess)
961 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
962 }
963
964 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
965}
966
967bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
968 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
969 // Punt on globals for now, although they can be used in limited
970 // RELATIVE LONG cases.
971 if (AM.BaseGV)
972 return false;
973
974 // Require a 20-bit signed offset.
975 if (!isInt<20>(AM.BaseOffs))
976 return false;
977
978 AddressingMode SupportedAM(true, true);
979 if (I != nullptr)
980 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
981
982 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
983 return false;
984
985 if (!SupportedAM.IndexReg)
986 // No indexing allowed.
987 return AM.Scale == 0;
988 else
989 // Indexing is OK but no scale factor can be applied.
990 return AM.Scale == 0 || AM.Scale == 1;
991}
992
993bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
994 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
995 return false;
996 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedSize();
997 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedSize();
998 return FromBits > ToBits;
999}
1000
1001bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
1002 if (!FromVT.isInteger() || !ToVT.isInteger())
1003 return false;
1004 unsigned FromBits = FromVT.getFixedSizeInBits();
1005 unsigned ToBits = ToVT.getFixedSizeInBits();
1006 return FromBits > ToBits;
1007}
1008
1009//===----------------------------------------------------------------------===//
1010// Inline asm support
1011//===----------------------------------------------------------------------===//
1012
1013TargetLowering::ConstraintType
1014SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
1015 if (Constraint.size() == 1) {
1016 switch (Constraint[0]) {
1017 case 'a': // Address register
1018 case 'd': // Data register (equivalent to 'r')
1019 case 'f': // Floating-point register
1020 case 'h': // High-part register
1021 case 'r': // General-purpose register
1022 case 'v': // Vector register
1023 return C_RegisterClass;
1024
1025 case 'Q': // Memory with base and unsigned 12-bit displacement
1026 case 'R': // Likewise, plus an index
1027 case 'S': // Memory with base and signed 20-bit displacement
1028 case 'T': // Likewise, plus an index
1029 case 'm': // Equivalent to 'T'.
1030 return C_Memory;
1031
1032 case 'I': // Unsigned 8-bit constant
1033 case 'J': // Unsigned 12-bit constant
1034 case 'K': // Signed 16-bit constant
1035 case 'L': // Signed 20-bit displacement (on all targets we support)
1036 case 'M': // 0x7fffffff
1037 return C_Immediate;
1038
1039 default:
1040 break;
1041 }
1042 }
1043 return TargetLowering::getConstraintType(Constraint);
1044}
1045
1046TargetLowering::ConstraintWeight SystemZTargetLowering::
1047getSingleConstraintMatchWeight(AsmOperandInfo &info,
1048 const char *constraint) const {
1049 ConstraintWeight weight = CW_Invalid;
1050 Value *CallOperandVal = info.CallOperandVal;
1051 // If we don't have a value, we can't do a match,
1052 // but allow it at the lowest weight.
1053 if (!CallOperandVal)
1054 return CW_Default;
1055 Type *type = CallOperandVal->getType();
1056 // Look at the constraint type.
1057 switch (*constraint) {
1058 default:
1059 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
1060 break;
1061
1062 case 'a': // Address register
1063 case 'd': // Data register (equivalent to 'r')
1064 case 'h': // High-part register
1065 case 'r': // General-purpose register
1066 if (CallOperandVal->getType()->isIntegerTy())
1067 weight = CW_Register;
1068 break;
1069
1070 case 'f': // Floating-point register
1071 if (type->isFloatingPointTy())
1072 weight = CW_Register;
1073 break;
1074
1075 case 'v': // Vector register
1076 if ((type->isVectorTy() || type->isFloatingPointTy()) &&
1077 Subtarget.hasVector())
1078 weight = CW_Register;
1079 break;
1080
1081 case 'I': // Unsigned 8-bit constant
1082 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1083 if (isUInt<8>(C->getZExtValue()))
1084 weight = CW_Constant;
1085 break;
1086
1087 case 'J': // Unsigned 12-bit constant
1088 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1089 if (isUInt<12>(C->getZExtValue()))
1090 weight = CW_Constant;
1091 break;
1092
1093 case 'K': // Signed 16-bit constant
1094 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1095 if (isInt<16>(C->getSExtValue()))
1096 weight = CW_Constant;
1097 break;
1098
1099 case 'L': // Signed 20-bit displacement (on all targets we support)
1100 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1101 if (isInt<20>(C->getSExtValue()))
1102 weight = CW_Constant;
1103 break;
1104
1105 case 'M': // 0x7fffffff
1106 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1107 if (C->getZExtValue() == 0x7fffffff)
1108 weight = CW_Constant;
1109 break;
1110 }
1111 return weight;
1112}
1113
1114// Parse a "{tNNN}" register constraint for which the register type "t"
1115// has already been verified. MC is the class associated with "t" and
1116// Map maps 0-based register numbers to LLVM register numbers.
1117static std::pair<unsigned, const TargetRegisterClass *>
1118parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC,
1119 const unsigned *Map, unsigned Size) {
1120 assert(*(Constraint.end()-1) == '}' && "Missing '}'")(static_cast <bool> (*(Constraint.end()-1) == '}' &&
"Missing '}'") ? void (0) : __assert_fail ("*(Constraint.end()-1) == '}' && \"Missing '}'\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1120, __extension__ __PRETTY_FUNCTION__))
;
1121 if (isdigit(Constraint[2])) {
1122 unsigned Index;
1123 bool Failed =
1124 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1125 if (!Failed && Index < Size && Map[Index])
1126 return std::make_pair(Map[Index], RC);
1127 }
1128 return std::make_pair(0U, nullptr);
1129}
1130
1131std::pair<unsigned, const TargetRegisterClass *>
1132SystemZTargetLowering::getRegForInlineAsmConstraint(
1133 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1134 if (Constraint.size() == 1) {
1135 // GCC Constraint Letters
1136 switch (Constraint[0]) {
1137 default: break;
1138 case 'd': // Data register (equivalent to 'r')
1139 case 'r': // General-purpose register
1140 if (VT == MVT::i64)
1141 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1142 else if (VT == MVT::i128)
1143 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1144 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1145
1146 case 'a': // Address register
1147 if (VT == MVT::i64)
1148 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1149 else if (VT == MVT::i128)
1150 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1151 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1152
1153 case 'h': // High-part register (an LLVM extension)
1154 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1155
1156 case 'f': // Floating-point register
1157 if (!useSoftFloat()) {
1158 if (VT == MVT::f64)
1159 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1160 else if (VT == MVT::f128)
1161 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1162 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1163 }
1164 break;
1165 case 'v': // Vector register
1166 if (Subtarget.hasVector()) {
1167 if (VT == MVT::f32)
1168 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1169 if (VT == MVT::f64)
1170 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1171 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1172 }
1173 break;
1174 }
1175 }
1176 if (Constraint.size() > 0 && Constraint[0] == '{') {
1177 // We need to override the default register parsing for GPRs and FPRs
1178 // because the interpretation depends on VT. The internal names of
1179 // the registers are also different from the external names
1180 // (F0D and F0S instead of F0, etc.).
1181 if (Constraint[1] == 'r') {
1182 if (VT == MVT::i32)
1183 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1184 SystemZMC::GR32Regs, 16);
1185 if (VT == MVT::i128)
1186 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1187 SystemZMC::GR128Regs, 16);
1188 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1189 SystemZMC::GR64Regs, 16);
1190 }
1191 if (Constraint[1] == 'f') {
1192 if (useSoftFloat())
1193 return std::make_pair(
1194 0u, static_cast<const TargetRegisterClass *>(nullptr));
1195 if (VT == MVT::f32)
1196 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1197 SystemZMC::FP32Regs, 16);
1198 if (VT == MVT::f128)
1199 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1200 SystemZMC::FP128Regs, 16);
1201 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1202 SystemZMC::FP64Regs, 16);
1203 }
1204 if (Constraint[1] == 'v') {
1205 if (!Subtarget.hasVector())
1206 return std::make_pair(
1207 0u, static_cast<const TargetRegisterClass *>(nullptr));
1208 if (VT == MVT::f32)
1209 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1210 SystemZMC::VR32Regs, 32);
1211 if (VT == MVT::f64)
1212 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1213 SystemZMC::VR64Regs, 32);
1214 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1215 SystemZMC::VR128Regs, 32);
1216 }
1217 }
1218 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1219}
1220
1221// FIXME? Maybe this could be a TableGen attribute on some registers and
1222// this table could be generated automatically from RegInfo.
1223Register SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT,
1224 const MachineFunction &MF) const {
1225
1226 Register Reg = StringSwitch<Register>(RegName)
1227 .Case("r15", SystemZ::R15D)
1228 .Default(0);
1229 if (Reg)
1230 return Reg;
1231 report_fatal_error("Invalid register name global variable");
1232}
1233
1234void SystemZTargetLowering::
1235LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1236 std::vector<SDValue> &Ops,
1237 SelectionDAG &DAG) const {
1238 // Only support length 1 constraints for now.
1239 if (Constraint.length() == 1) {
1240 switch (Constraint[0]) {
1241 case 'I': // Unsigned 8-bit constant
1242 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1243 if (isUInt<8>(C->getZExtValue()))
1244 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1245 Op.getValueType()));
1246 return;
1247
1248 case 'J': // Unsigned 12-bit constant
1249 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1250 if (isUInt<12>(C->getZExtValue()))
1251 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1252 Op.getValueType()));
1253 return;
1254
1255 case 'K': // Signed 16-bit constant
1256 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1257 if (isInt<16>(C->getSExtValue()))
1258 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1259 Op.getValueType()));
1260 return;
1261
1262 case 'L': // Signed 20-bit displacement (on all targets we support)
1263 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1264 if (isInt<20>(C->getSExtValue()))
1265 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1266 Op.getValueType()));
1267 return;
1268
1269 case 'M': // 0x7fffffff
1270 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1271 if (C->getZExtValue() == 0x7fffffff)
1272 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1273 Op.getValueType()));
1274 return;
1275 }
1276 }
1277 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1278}
1279
1280//===----------------------------------------------------------------------===//
1281// Calling conventions
1282//===----------------------------------------------------------------------===//
1283
1284#include "SystemZGenCallingConv.inc"
1285
1286const MCPhysReg *SystemZTargetLowering::getScratchRegisters(
1287 CallingConv::ID) const {
1288 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1289 SystemZ::R14D, 0 };
1290 return ScratchRegs;
1291}
1292
1293bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
1294 Type *ToType) const {
1295 return isTruncateFree(FromType, ToType);
1296}
1297
1298bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1299 return CI->isTailCall();
1300}
1301
1302// We do not yet support 128-bit single-element vector types. If the user
1303// attempts to use such types as function argument or return type, prefer
1304// to error out instead of emitting code violating the ABI.
1305static void VerifyVectorType(MVT VT, EVT ArgVT) {
1306 if (ArgVT.isVector() && !VT.isVector())
1307 report_fatal_error("Unsupported vector argument or return type");
1308}
1309
1310static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) {
1311 for (unsigned i = 0; i < Ins.size(); ++i)
1312 VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
1313}
1314
1315static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
1316 for (unsigned i = 0; i < Outs.size(); ++i)
1317 VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
1318}
1319
1320// Value is a value that has been passed to us in the location described by VA
1321// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1322// any loads onto Chain.
1323static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL,
1324 CCValAssign &VA, SDValue Chain,
1325 SDValue Value) {
1326 // If the argument has been promoted from a smaller type, insert an
1327 // assertion to capture this.
1328 if (VA.getLocInfo() == CCValAssign::SExt)
1329 Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
1330 DAG.getValueType(VA.getValVT()));
1331 else if (VA.getLocInfo() == CCValAssign::ZExt)
1332 Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
1333 DAG.getValueType(VA.getValVT()));
1334
1335 if (VA.isExtInLoc())
1336 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1337 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1338 // If this is a short vector argument loaded from the stack,
1339 // extend from i64 to full vector size and then bitcast.
1340 assert(VA.getLocVT() == MVT::i64)(static_cast <bool> (VA.getLocVT() == MVT::i64) ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1340, __extension__ __PRETTY_FUNCTION__))
;
1341 assert(VA.getValVT().isVector())(static_cast <bool> (VA.getValVT().isVector()) ? void (
0) : __assert_fail ("VA.getValVT().isVector()", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1341, __extension__ __PRETTY_FUNCTION__))
;
1342 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1343 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1344 } else
1345 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo")(static_cast <bool> (VA.getLocInfo() == CCValAssign::Full
&& "Unsupported getLocInfo") ? void (0) : __assert_fail
("VA.getLocInfo() == CCValAssign::Full && \"Unsupported getLocInfo\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1345, __extension__ __PRETTY_FUNCTION__))
;
1346 return Value;
1347}
1348
1349// Value is a value of type VA.getValVT() that we need to copy into
1350// the location described by VA. Return a copy of Value converted to
1351// VA.getValVT(). The caller is responsible for handling indirect values.
1352static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
1353 CCValAssign &VA, SDValue Value) {
1354 switch (VA.getLocInfo()) {
1355 case CCValAssign::SExt:
1356 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1357 case CCValAssign::ZExt:
1358 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1359 case CCValAssign::AExt:
1360 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1361 case CCValAssign::BCvt:
1362 // If this is a short vector argument to be stored to the stack,
1363 // bitcast to v2i64 and then extract first element.
1364 assert(VA.getLocVT() == MVT::i64)(static_cast <bool> (VA.getLocVT() == MVT::i64) ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1364, __extension__ __PRETTY_FUNCTION__))
;
1365 assert(VA.getValVT().isVector())(static_cast <bool> (VA.getValVT().isVector()) ? void (
0) : __assert_fail ("VA.getValVT().isVector()", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1365, __extension__ __PRETTY_FUNCTION__))
;
1366 Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
1367 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1368 DAG.getConstant(0, DL, MVT::i32));
1369 case CCValAssign::Full:
1370 return Value;
1371 default:
1372 llvm_unreachable("Unhandled getLocInfo()")::llvm::llvm_unreachable_internal("Unhandled getLocInfo()", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1372)
;
1373 }
1374}
1375
1376static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) {
1377 SDLoc DL(In);
1378 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
1379 DAG.getIntPtrConstant(0, DL));
1380 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
1381 DAG.getIntPtrConstant(1, DL));
1382 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1383 MVT::Untyped, Hi, Lo);
1384 return SDValue(Pair, 0);
1385}
1386
1387static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) {
1388 SDLoc DL(In);
1389 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1390 DL, MVT::i64, In);
1391 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1392 DL, MVT::i64, In);
1393 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1394}
1395
1396bool SystemZTargetLowering::splitValueIntoRegisterParts(
1397 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1398 unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
1399 EVT ValueVT = Val.getValueType();
1400 assert((ValueVT != MVT::i128 ||(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1403, __extension__ __PRETTY_FUNCTION__))
1401 ((NumParts == 1 && PartVT == MVT::Untyped) ||(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1403, __extension__ __PRETTY_FUNCTION__))
1402 (NumParts == 2 && PartVT == MVT::i64))) &&(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1403, __extension__ __PRETTY_FUNCTION__))
1403 "Unknown handling of i128 value.")(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1403, __extension__ __PRETTY_FUNCTION__))
;
1404 if (ValueVT == MVT::i128 && NumParts == 1) {
1405 // Inline assembly operand.
1406 Parts[0] = lowerI128ToGR128(DAG, Val);
1407 return true;
1408 }
1409 return false;
1410}
1411
1412SDValue SystemZTargetLowering::joinRegisterPartsIntoValue(
1413 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1414 MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
1415 assert((ValueVT != MVT::i128 ||(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1418, __extension__ __PRETTY_FUNCTION__))
1416 ((NumParts == 1 && PartVT == MVT::Untyped) ||(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1418, __extension__ __PRETTY_FUNCTION__))
1417 (NumParts == 2 && PartVT == MVT::i64))) &&(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1418, __extension__ __PRETTY_FUNCTION__))
1418 "Unknown handling of i128 value.")(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1418, __extension__ __PRETTY_FUNCTION__))
;
1419 if (ValueVT == MVT::i128 && NumParts == 1)
1420 // Inline assembly operand.
1421 return lowerGR128ToI128(DAG, Parts[0]);
1422 return SDValue();
1423}
1424
1425SDValue SystemZTargetLowering::LowerFormalArguments(
1426 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1427 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1428 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1429 MachineFunction &MF = DAG.getMachineFunction();
1430 MachineFrameInfo &MFI = MF.getFrameInfo();
1431 MachineRegisterInfo &MRI = MF.getRegInfo();
1432 SystemZMachineFunctionInfo *FuncInfo =
1433 MF.getInfo<SystemZMachineFunctionInfo>();
1434 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1435 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1436
1437 // Detect unsupported vector argument types.
1438 if (Subtarget.hasVector())
1439 VerifyVectorTypes(Ins);
1440
1441 // Assign locations to all of the incoming arguments.
1442 SmallVector<CCValAssign, 16> ArgLocs;
1443 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1444 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1445
1446 unsigned NumFixedGPRs = 0;
1447 unsigned NumFixedFPRs = 0;
1448 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1449 SDValue ArgValue;
1450 CCValAssign &VA = ArgLocs[I];
1451 EVT LocVT = VA.getLocVT();
1452 if (VA.isRegLoc()) {
1453 // Arguments passed in registers
1454 const TargetRegisterClass *RC;
1455 switch (LocVT.getSimpleVT().SimpleTy) {
1456 default:
1457 // Integers smaller than i64 should be promoted to i64.
1458 llvm_unreachable("Unexpected argument type")::llvm::llvm_unreachable_internal("Unexpected argument type",
"/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1458)
;
1459 case MVT::i32:
1460 NumFixedGPRs += 1;
1461 RC = &SystemZ::GR32BitRegClass;
1462 break;
1463 case MVT::i64:
1464 NumFixedGPRs += 1;
1465 RC = &SystemZ::GR64BitRegClass;
1466 break;
1467 case MVT::f32:
1468 NumFixedFPRs += 1;
1469 RC = &SystemZ::FP32BitRegClass;
1470 break;
1471 case MVT::f64:
1472 NumFixedFPRs += 1;
1473 RC = &SystemZ::FP64BitRegClass;
1474 break;
1475 case MVT::v16i8:
1476 case MVT::v8i16:
1477 case MVT::v4i32:
1478 case MVT::v2i64:
1479 case MVT::v4f32:
1480 case MVT::v2f64:
1481 RC = &SystemZ::VR128BitRegClass;
1482 break;
1483 }
1484
1485 Register VReg = MRI.createVirtualRegister(RC);
1486 MRI.addLiveIn(VA.getLocReg(), VReg);
1487 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1488 } else {
1489 assert(VA.isMemLoc() && "Argument not register or memory")(static_cast <bool> (VA.isMemLoc() && "Argument not register or memory"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"Argument not register or memory\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1489, __extension__ __PRETTY_FUNCTION__))
;
1490
1491 // Create the frame index object for this incoming parameter.
1492 int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
1493 VA.getLocMemOffset(), true);
1494
1495 // Create the SelectionDAG nodes corresponding to a load
1496 // from this parameter. Unpromoted ints and floats are
1497 // passed as right-justified 8-byte values.
1498 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1499 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1500 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1501 DAG.getIntPtrConstant(4, DL));
1502 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1503 MachinePointerInfo::getFixedStack(MF, FI));
1504 }
1505
1506 // Convert the value of the argument register into the value that's
1507 // being passed.
1508 if (VA.getLocInfo() == CCValAssign::Indirect) {
1509 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1510 MachinePointerInfo()));
1511 // If the original argument was split (e.g. i128), we need
1512 // to load all parts of it here (using the same address).
1513 unsigned ArgIndex = Ins[I].OrigArgIndex;
1514 assert (Ins[I].PartOffset == 0)(static_cast <bool> (Ins[I].PartOffset == 0) ? void (0)
: __assert_fail ("Ins[I].PartOffset == 0", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1514, __extension__ __PRETTY_FUNCTION__))
;
1515 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1516 CCValAssign &PartVA = ArgLocs[I + 1];
1517 unsigned PartOffset = Ins[I + 1].PartOffset;
1518 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1519 DAG.getIntPtrConstant(PartOffset, DL));
1520 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1521 MachinePointerInfo()));
1522 ++I;
1523 }
1524 } else
1525 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1526 }
1527
1528 if (IsVarArg) {
1529 // Save the number of non-varargs registers for later use by va_start, etc.
1530 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1531 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1532
1533 // Likewise the address (in the form of a frame index) of where the
1534 // first stack vararg would be. The 1-byte size here is arbitrary.
1535 int64_t StackSize = CCInfo.getNextStackOffset();
1536 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
1537
1538 // ...and a similar frame index for the caller-allocated save area
1539 // that will be used to store the incoming registers.
1540 int64_t RegSaveOffset =
1541 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1542 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1543 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1544
1545 // Store the FPR varargs in the reserved frame slots. (We store the
1546 // GPRs as part of the prologue.)
1547 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1548 SDValue MemOps[SystemZ::ELFNumArgFPRs];
1549 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1550 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1551 int FI =
1552 MFI.CreateFixedObject(8, -SystemZMC::ELFCallFrameSize + Offset, true);
1553 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1554 unsigned VReg = MF.addLiveIn(SystemZ::ELFArgFPRs[I],
1555 &SystemZ::FP64BitRegClass);
1556 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1557 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1558 MachinePointerInfo::getFixedStack(MF, FI));
1559 }
1560 // Join the stores, which are independent of one another.
1561 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1562 makeArrayRef(&MemOps[NumFixedFPRs],
1563 SystemZ::ELFNumArgFPRs-NumFixedFPRs));
1564 }
1565 }
1566
1567 return Chain;
1568}
1569
1570static bool canUseSiblingCall(const CCState &ArgCCInfo,
1571 SmallVectorImpl<CCValAssign> &ArgLocs,
1572 SmallVectorImpl<ISD::OutputArg> &Outs) {
1573 // Punt if there are any indirect or stack arguments, or if the call
1574 // needs the callee-saved argument register R6, or if the call uses
1575 // the callee-saved register arguments SwiftSelf and SwiftError.
1576 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1577 CCValAssign &VA = ArgLocs[I];
1578 if (VA.getLocInfo() == CCValAssign::Indirect)
1579 return false;
1580 if (!VA.isRegLoc())
1581 return false;
1582 Register Reg = VA.getLocReg();
1583 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1584 return false;
1585 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1586 return false;
1587 }
1588 return true;
1589}
1590
1591SDValue
1592SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
1593 SmallVectorImpl<SDValue> &InVals) const {
1594 SelectionDAG &DAG = CLI.DAG;
1595 SDLoc &DL = CLI.DL;
1596 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1597 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1598 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1599 SDValue Chain = CLI.Chain;
1600 SDValue Callee = CLI.Callee;
1601 bool &IsTailCall = CLI.IsTailCall;
1602 CallingConv::ID CallConv = CLI.CallConv;
1603 bool IsVarArg = CLI.IsVarArg;
1604 MachineFunction &MF = DAG.getMachineFunction();
1605 EVT PtrVT = getPointerTy(MF.getDataLayout());
1606 LLVMContext &Ctx = *DAG.getContext();
1607
1608 // Detect unsupported vector argument and return types.
1609 if (Subtarget.hasVector()) {
1610 VerifyVectorTypes(Outs);
1611 VerifyVectorTypes(Ins);
1612 }
1613
1614 // Analyze the operands of the call, assigning locations to each operand.
1615 SmallVector<CCValAssign, 16> ArgLocs;
1616 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1617 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1618
1619 // We don't support GuaranteedTailCallOpt, only automatically-detected
1620 // sibling calls.
1621 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1622 IsTailCall = false;
1623
1624 // Get a count of how many bytes are to be pushed on the stack.
1625 unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1626
1627 // Mark the start of the call.
1628 if (!IsTailCall)
1629 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1630
1631 // Copy argument values to their designated locations.
1632 SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
1633 SmallVector<SDValue, 8> MemOpChains;
1634 SDValue StackPtr;
1635 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1636 CCValAssign &VA = ArgLocs[I];
1637 SDValue ArgValue = OutVals[I];
1638
1639 if (VA.getLocInfo() == CCValAssign::Indirect) {
1640 // Store the argument in a stack slot and pass its address.
1641 unsigned ArgIndex = Outs[I].OrigArgIndex;
1642 EVT SlotVT;
1643 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1644 // Allocate the full stack space for a promoted (and split) argument.
1645 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1646 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1647 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1648 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1649 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1650 } else {
1651 SlotVT = Outs[I].ArgVT;
1652 }
1653 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1654 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1655 MemOpChains.push_back(
1656 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1657 MachinePointerInfo::getFixedStack(MF, FI)));
1658 // If the original argument was split (e.g. i128), we need
1659 // to store all parts of it here (and pass just one address).
1660 assert (Outs[I].PartOffset == 0)(static_cast <bool> (Outs[I].PartOffset == 0) ? void (0
) : __assert_fail ("Outs[I].PartOffset == 0", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1660, __extension__ __PRETTY_FUNCTION__))
;
1661 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1662 SDValue PartValue = OutVals[I + 1];
1663 unsigned PartOffset = Outs[I + 1].PartOffset;
1664 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1665 DAG.getIntPtrConstant(PartOffset, DL));
1666 MemOpChains.push_back(
1667 DAG.getStore(Chain, DL, PartValue, Address,
1668 MachinePointerInfo::getFixedStack(MF, FI)));
1669 assert((PartOffset + PartValue.getValueType().getStoreSize() <=(static_cast <bool> ((PartOffset + PartValue.getValueType
().getStoreSize() <= SlotVT.getStoreSize()) && "Not enough space for argument part!"
) ? void (0) : __assert_fail ("(PartOffset + PartValue.getValueType().getStoreSize() <= SlotVT.getStoreSize()) && \"Not enough space for argument part!\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1670, __extension__ __PRETTY_FUNCTION__))
1670 SlotVT.getStoreSize()) && "Not enough space for argument part!")(static_cast <bool> ((PartOffset + PartValue.getValueType
().getStoreSize() <= SlotVT.getStoreSize()) && "Not enough space for argument part!"
) ? void (0) : __assert_fail ("(PartOffset + PartValue.getValueType().getStoreSize() <= SlotVT.getStoreSize()) && \"Not enough space for argument part!\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1670, __extension__ __PRETTY_FUNCTION__))
;
1671 ++I;
1672 }
1673 ArgValue = SpillSlot;
1674 } else
1675 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1676
1677 if (VA.isRegLoc())
1678 // Queue up the argument copies and emit them at the end.
1679 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1680 else {
1681 assert(VA.isMemLoc() && "Argument not register or memory")(static_cast <bool> (VA.isMemLoc() && "Argument not register or memory"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"Argument not register or memory\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1681, __extension__ __PRETTY_FUNCTION__))
;
1682
1683 // Work out the address of the stack slot. Unpromoted ints and
1684 // floats are passed as right-justified 8-byte values.
1685 if (!StackPtr.getNode())
1686 StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
1687 unsigned Offset = SystemZMC::ELFCallFrameSize + VA.getLocMemOffset();
1688 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1689 Offset += 4;
1690 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1691 DAG.getIntPtrConstant(Offset, DL));
1692
1693 // Emit the store.
1694 MemOpChains.push_back(
1695 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1696 }
1697 }
1698
1699 // Join the stores, which are independent of one another.
1700 if (!MemOpChains.empty())
1701 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1702
1703 // Accept direct calls by converting symbolic call addresses to the
1704 // associated Target* opcodes. Force %r1 to be used for indirect
1705 // tail calls.
1706 SDValue Glue;
1707 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1708 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1709 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1710 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1711 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1712 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1713 } else if (IsTailCall) {
1714 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1715 Glue = Chain.getValue(1);
1716 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1717 }
1718
1719 // Build a sequence of copy-to-reg nodes, chained and glued together.
1720 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1721 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1722 RegsToPass[I].second, Glue);
1723 Glue = Chain.getValue(1);
1724 }
1725
1726 // The first call operand is the chain and the second is the target address.
1727 SmallVector<SDValue, 8> Ops;
1728 Ops.push_back(Chain);
1729 Ops.push_back(Callee);
1730
1731 // Add argument registers to the end of the list so that they are
1732 // known live into the call.
1733 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1734 Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1735 RegsToPass[I].second.getValueType()));
1736
1737 // Add a register mask operand representing the call-preserved registers.
1738 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1739 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1740 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1740, __extension__ __PRETTY_FUNCTION__))
;
1741 Ops.push_back(DAG.getRegisterMask(Mask));
1742
1743 // Glue the call to the argument copies, if any.
1744 if (Glue.getNode())
1745 Ops.push_back(Glue);
1746
1747 // Emit the call.
1748 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1749 if (IsTailCall)
1750 return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1751 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
1752 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
1753 Glue = Chain.getValue(1);
1754
1755 // Mark the end of the call, which is glued to the call itself.
1756 Chain = DAG.getCALLSEQ_END(Chain,
1757 DAG.getConstant(NumBytes, DL, PtrVT, true),
1758 DAG.getConstant(0, DL, PtrVT, true),
1759 Glue, DL);
1760 Glue = Chain.getValue(1);
1761
1762 // Assign locations to each value returned by this call.
1763 SmallVector<CCValAssign, 16> RetLocs;
1764 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
1765 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1766
1767 // Copy all of the result registers out of their specified physreg.
1768 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1769 CCValAssign &VA = RetLocs[I];
1770
1771 // Copy the value out, gluing the copy to the end of the call sequence.
1772 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1773 VA.getLocVT(), Glue);
1774 Chain = RetValue.getValue(1);
1775 Glue = RetValue.getValue(2);
1776
1777 // Convert the value of the return register into the value that's
1778 // being returned.
1779 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1780 }
1781
1782 return Chain;
1783}
1784
1785bool SystemZTargetLowering::
1786CanLowerReturn(CallingConv::ID CallConv,
1787 MachineFunction &MF, bool isVarArg,
1788 const SmallVectorImpl<ISD::OutputArg> &Outs,
1789 LLVMContext &Context) const {
1790 // Detect unsupported vector return types.
1791 if (Subtarget.hasVector())
1792 VerifyVectorTypes(Outs);
1793
1794 // Special case that we cannot easily detect in RetCC_SystemZ since
1795 // i128 is not a legal type.
1796 for (auto &Out : Outs)
1797 if (Out.ArgVT == MVT::i128)
1798 return false;
1799
1800 SmallVector<CCValAssign, 16> RetLocs;
1801 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1802 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1803}
1804
1805SDValue
1806SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
1807 bool IsVarArg,
1808 const SmallVectorImpl<ISD::OutputArg> &Outs,
1809 const SmallVectorImpl<SDValue> &OutVals,
1810 const SDLoc &DL, SelectionDAG &DAG) const {
1811 MachineFunction &MF = DAG.getMachineFunction();
1812
1813 // Detect unsupported vector return types.
1814 if (Subtarget.hasVector())
1815 VerifyVectorTypes(Outs);
1816
1817 // Assign locations to each returned value.
1818 SmallVector<CCValAssign, 16> RetLocs;
1819 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1820 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1821
1822 // Quick exit for void returns
1823 if (RetLocs.empty())
1824 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1825
1826 if (CallConv == CallingConv::GHC)
1827 report_fatal_error("GHC functions return void only");
1828
1829 // Copy the result values into the output registers.
1830 SDValue Glue;
1831 SmallVector<SDValue, 4> RetOps;
1832 RetOps.push_back(Chain);
1833 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1834 CCValAssign &VA = RetLocs[I];
1835 SDValue RetValue = OutVals[I];
1836
1837 // Make the return register live on exit.
1838 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1838, __extension__ __PRETTY_FUNCTION__))
;
1839
1840 // Promote the value as required.
1841 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1842
1843 // Chain and glue the copies together.
1844 Register Reg = VA.getLocReg();
1845 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1846 Glue = Chain.getValue(1);
1847 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1848 }
1849
1850 // Update chain and glue.
1851 RetOps[0] = Chain;
1852 if (Glue.getNode())
1853 RetOps.push_back(Glue);
1854
1855 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
1856}
1857
1858// Return true if Op is an intrinsic node with chain that returns the CC value
1859// as its only (other) argument. Provide the associated SystemZISD opcode and
1860// the mask of valid CC values if so.
1861static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1862 unsigned &CCValid) {
1863 unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1864 switch (Id) {
1865 case Intrinsic::s390_tbegin:
1866 Opcode = SystemZISD::TBEGIN;
1867 CCValid = SystemZ::CCMASK_TBEGIN;
1868 return true;
1869
1870 case Intrinsic::s390_tbegin_nofloat:
1871 Opcode = SystemZISD::TBEGIN_NOFLOAT;
1872 CCValid = SystemZ::CCMASK_TBEGIN;
1873 return true;
1874
1875 case Intrinsic::s390_tend:
1876 Opcode = SystemZISD::TEND;
1877 CCValid = SystemZ::CCMASK_TEND;
1878 return true;
1879
1880 default:
1881 return false;
1882 }
1883}
1884
1885// Return true if Op is an intrinsic node without chain that returns the
1886// CC value as its final argument. Provide the associated SystemZISD
1887// opcode and the mask of valid CC values if so.
1888static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
1889 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1890 switch (Id) {
1891 case Intrinsic::s390_vpkshs:
1892 case Intrinsic::s390_vpksfs:
1893 case Intrinsic::s390_vpksgs:
1894 Opcode = SystemZISD::PACKS_CC;
1895 CCValid = SystemZ::CCMASK_VCMP;
1896 return true;
1897
1898 case Intrinsic::s390_vpklshs:
1899 case Intrinsic::s390_vpklsfs:
1900 case Intrinsic::s390_vpklsgs:
1901 Opcode = SystemZISD::PACKLS_CC;
1902 CCValid = SystemZ::CCMASK_VCMP;
1903 return true;
1904
1905 case Intrinsic::s390_vceqbs:
1906 case Intrinsic::s390_vceqhs:
1907 case Intrinsic::s390_vceqfs:
1908 case Intrinsic::s390_vceqgs:
1909 Opcode = SystemZISD::VICMPES;
1910 CCValid = SystemZ::CCMASK_VCMP;
1911 return true;
1912
1913 case Intrinsic::s390_vchbs:
1914 case Intrinsic::s390_vchhs:
1915 case Intrinsic::s390_vchfs:
1916 case Intrinsic::s390_vchgs:
1917 Opcode = SystemZISD::VICMPHS;
1918 CCValid = SystemZ::CCMASK_VCMP;
1919 return true;
1920
1921 case Intrinsic::s390_vchlbs:
1922 case Intrinsic::s390_vchlhs:
1923 case Intrinsic::s390_vchlfs:
1924 case Intrinsic::s390_vchlgs:
1925 Opcode = SystemZISD::VICMPHLS;
1926 CCValid = SystemZ::CCMASK_VCMP;
1927 return true;
1928
1929 case Intrinsic::s390_vtm:
1930 Opcode = SystemZISD::VTM;
1931 CCValid = SystemZ::CCMASK_VCMP;
1932 return true;
1933
1934 case Intrinsic::s390_vfaebs:
1935 case Intrinsic::s390_vfaehs:
1936 case Intrinsic::s390_vfaefs:
1937 Opcode = SystemZISD::VFAE_CC;
1938 CCValid = SystemZ::CCMASK_ANY;
1939 return true;
1940
1941 case Intrinsic::s390_vfaezbs:
1942 case Intrinsic::s390_vfaezhs:
1943 case Intrinsic::s390_vfaezfs:
1944 Opcode = SystemZISD::VFAEZ_CC;
1945 CCValid = SystemZ::CCMASK_ANY;
1946 return true;
1947
1948 case Intrinsic::s390_vfeebs:
1949 case Intrinsic::s390_vfeehs:
1950 case Intrinsic::s390_vfeefs:
1951 Opcode = SystemZISD::VFEE_CC;
1952 CCValid = SystemZ::CCMASK_ANY;
1953 return true;
1954
1955 case Intrinsic::s390_vfeezbs:
1956 case Intrinsic::s390_vfeezhs:
1957 case Intrinsic::s390_vfeezfs:
1958 Opcode = SystemZISD::VFEEZ_CC;
1959 CCValid = SystemZ::CCMASK_ANY;
1960 return true;
1961
1962 case Intrinsic::s390_vfenebs:
1963 case Intrinsic::s390_vfenehs:
1964 case Intrinsic::s390_vfenefs:
1965 Opcode = SystemZISD::VFENE_CC;
1966 CCValid = SystemZ::CCMASK_ANY;
1967 return true;
1968
1969 case Intrinsic::s390_vfenezbs:
1970 case Intrinsic::s390_vfenezhs:
1971 case Intrinsic::s390_vfenezfs:
1972 Opcode = SystemZISD::VFENEZ_CC;
1973 CCValid = SystemZ::CCMASK_ANY;
1974 return true;
1975
1976 case Intrinsic::s390_vistrbs:
1977 case Intrinsic::s390_vistrhs:
1978 case Intrinsic::s390_vistrfs:
1979 Opcode = SystemZISD::VISTR_CC;
1980 CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3;
1981 return true;
1982
1983 case Intrinsic::s390_vstrcbs:
1984 case Intrinsic::s390_vstrchs:
1985 case Intrinsic::s390_vstrcfs:
1986 Opcode = SystemZISD::VSTRC_CC;
1987 CCValid = SystemZ::CCMASK_ANY;
1988 return true;
1989
1990 case Intrinsic::s390_vstrczbs:
1991 case Intrinsic::s390_vstrczhs:
1992 case Intrinsic::s390_vstrczfs:
1993 Opcode = SystemZISD::VSTRCZ_CC;
1994 CCValid = SystemZ::CCMASK_ANY;
1995 return true;
1996
1997 case Intrinsic::s390_vstrsb:
1998 case Intrinsic::s390_vstrsh:
1999 case Intrinsic::s390_vstrsf:
2000 Opcode = SystemZISD::VSTRS_CC;
2001 CCValid = SystemZ::CCMASK_ANY;
2002 return true;
2003
2004 case Intrinsic::s390_vstrszb:
2005 case Intrinsic::s390_vstrszh:
2006 case Intrinsic::s390_vstrszf:
2007 Opcode = SystemZISD::VSTRSZ_CC;
2008 CCValid = SystemZ::CCMASK_ANY;
2009 return true;
2010
2011 case Intrinsic::s390_vfcedbs:
2012 case Intrinsic::s390_vfcesbs:
2013 Opcode = SystemZISD::VFCMPES;
2014 CCValid = SystemZ::CCMASK_VCMP;
2015 return true;
2016
2017 case Intrinsic::s390_vfchdbs:
2018 case Intrinsic::s390_vfchsbs:
2019 Opcode = SystemZISD::VFCMPHS;
2020 CCValid = SystemZ::CCMASK_VCMP;
2021 return true;
2022
2023 case Intrinsic::s390_vfchedbs:
2024 case Intrinsic::s390_vfchesbs:
2025 Opcode = SystemZISD::VFCMPHES;
2026 CCValid = SystemZ::CCMASK_VCMP;
2027 return true;
2028
2029 case Intrinsic::s390_vftcidb:
2030 case Intrinsic::s390_vftcisb:
2031 Opcode = SystemZISD::VFTCI;
2032 CCValid = SystemZ::CCMASK_VCMP;
2033 return true;
2034
2035 case Intrinsic::s390_tdc:
2036 Opcode = SystemZISD::TDC;
2037 CCValid = SystemZ::CCMASK_TDC;
2038 return true;
2039
2040 default:
2041 return false;
2042 }
2043}
2044
2045// Emit an intrinsic with chain and an explicit CC register result.
2046static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op,
2047 unsigned Opcode) {
2048 // Copy all operands except the intrinsic ID.
2049 unsigned NumOps = Op.getNumOperands();
2050 SmallVector<SDValue, 6> Ops;
2051 Ops.reserve(NumOps - 1);
2052 Ops.push_back(Op.getOperand(0));
2053 for (unsigned I = 2; I < NumOps; ++I)
2054 Ops.push_back(Op.getOperand(I));
2055
2056 assert(Op->getNumValues() == 2 && "Expected only CC result and chain")(static_cast <bool> (Op->getNumValues() == 2 &&
"Expected only CC result and chain") ? void (0) : __assert_fail
("Op->getNumValues() == 2 && \"Expected only CC result and chain\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2056, __extension__ __PRETTY_FUNCTION__))
;
2057 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2058 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2059 SDValue OldChain = SDValue(Op.getNode(), 1);
2060 SDValue NewChain = SDValue(Intr.getNode(), 1);
2061 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2062 return Intr.getNode();
2063}
2064
2065// Emit an intrinsic with an explicit CC register result.
2066static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op,
2067 unsigned Opcode) {
2068 // Copy all operands except the intrinsic ID.
2069 unsigned NumOps = Op.getNumOperands();
2070 SmallVector<SDValue, 6> Ops;
2071 Ops.reserve(NumOps - 1);
2072 for (unsigned I = 1; I < NumOps; ++I)
2073 Ops.push_back(Op.getOperand(I));
2074
2075 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2076 return Intr.getNode();
2077}
2078
2079// CC is a comparison that will be implemented using an integer or
2080// floating-point comparison. Return the condition code mask for
2081// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2082// unsigned comparisons and clear for signed ones. In the floating-point
2083// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2084static unsigned CCMaskForCondCode(ISD::CondCode CC) {
2085#define CONV(X) \
2086 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2087 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2088 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2089
2090 switch (CC) {
2091 default:
2092 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2092)
;
2093
2094 CONV(EQ);
2095 CONV(NE);
2096 CONV(GT);
2097 CONV(GE);
2098 CONV(LT);
2099 CONV(LE);
2100
2101 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2102 case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
2103 }
2104#undef CONV
2105}
2106
2107// If C can be converted to a comparison against zero, adjust the operands
2108// as necessary.
2109static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2110 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2111 return;
2112
2113 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2114 if (!ConstOp1)
2115 return;
2116
2117 int64_t Value = ConstOp1->getSExtValue();
2118 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2119 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2120 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2121 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2122 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2123 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2124 }
2125}
2126
2127// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2128// adjust the operands as necessary.
2129static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2130 Comparison &C) {
2131 // For us to make any changes, it must a comparison between a single-use
2132 // load and a constant.
2133 if (!C.Op0.hasOneUse() ||
2134 C.Op0.getOpcode() != ISD::LOAD ||
2135 C.Op1.getOpcode() != ISD::Constant)
2136 return;
2137
2138 // We must have an 8- or 16-bit load.
2139 auto *Load = cast<LoadSDNode>(C.Op0);
2140 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2141 if ((NumBits != 8 && NumBits != 16) ||
2142 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2143 return;
2144
2145 // The load must be an extending one and the constant must be within the
2146 // range of the unextended value.
2147 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2148 uint64_t Value = ConstOp1->getZExtValue();
2149 uint64_t Mask = (1 << NumBits) - 1;
2150 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2151 // Make sure that ConstOp1 is in range of C.Op0.
2152 int64_t SignedValue = ConstOp1->getSExtValue();
2153 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2154 return;
2155 if (C.ICmpType != SystemZICMP::SignedOnly) {
2156 // Unsigned comparison between two sign-extended values is equivalent
2157 // to unsigned comparison between two zero-extended values.
2158 Value &= Mask;
2159 } else if (NumBits == 8) {
2160 // Try to treat the comparison as unsigned, so that we can use CLI.
2161 // Adjust CCMask and Value as necessary.
2162 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2163 // Test whether the high bit of the byte is set.
2164 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2165 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2166 // Test whether the high bit of the byte is clear.
2167 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2168 else
2169 // No instruction exists for this combination.
2170 return;
2171 C.ICmpType = SystemZICMP::UnsignedOnly;
2172 }
2173 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2174 if (Value > Mask)
2175 return;
2176 // If the constant is in range, we can use any comparison.
2177 C.ICmpType = SystemZICMP::Any;
2178 } else
2179 return;
2180
2181 // Make sure that the first operand is an i32 of the right extension type.
2182 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2183 ISD::SEXTLOAD :
2184 ISD::ZEXTLOAD);
2185 if (C.Op0.getValueType() != MVT::i32 ||
2186 Load->getExtensionType() != ExtType) {
2187 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2188 Load->getBasePtr(), Load->getPointerInfo(),
2189 Load->getMemoryVT(), Load->getAlignment(),
2190 Load->getMemOperand()->getFlags());
2191 // Update the chain uses.
2192 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2193 }
2194
2195 // Make sure that the second operand is an i32 with the right value.
2196 if (C.Op1.getValueType() != MVT::i32 ||
2197 Value != ConstOp1->getZExtValue())
2198 C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2199}
2200
2201// Return true if Op is either an unextended load, or a load suitable
2202// for integer register-memory comparisons of type ICmpType.
2203static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2204 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2205 if (Load) {
2206 // There are no instructions to compare a register with a memory byte.
2207 if (Load->getMemoryVT() == MVT::i8)
2208 return false;
2209 // Otherwise decide on extension type.
2210 switch (Load->getExtensionType()) {
2211 case ISD::NON_EXTLOAD:
2212 return true;
2213 case ISD::SEXTLOAD:
2214 return ICmpType != SystemZICMP::UnsignedOnly;
2215 case ISD::ZEXTLOAD:
2216 return ICmpType != SystemZICMP::SignedOnly;
2217 default:
2218 break;
2219 }
2220 }
2221 return false;
2222}
2223
2224// Return true if it is better to swap the operands of C.
2225static bool shouldSwapCmpOperands(const Comparison &C) {
2226 // Leave f128 comparisons alone, since they have no memory forms.
2227 if (C.Op0.getValueType() == MVT::f128)
2228 return false;
2229
2230 // Always keep a floating-point constant second, since comparisons with
2231 // zero can use LOAD TEST and comparisons with other constants make a
2232 // natural memory operand.
2233 if (isa<ConstantFPSDNode>(C.Op1))
2234 return false;
2235
2236 // Never swap comparisons with zero since there are many ways to optimize
2237 // those later.
2238 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2239 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2240 return false;
2241
2242 // Also keep natural memory operands second if the loaded value is
2243 // only used here. Several comparisons have memory forms.
2244 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2245 return false;
2246
2247 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2248 // In that case we generally prefer the memory to be second.
2249 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2250 // The only exceptions are when the second operand is a constant and
2251 // we can use things like CHHSI.
2252 if (!ConstOp1)
2253 return true;
2254 // The unsigned memory-immediate instructions can handle 16-bit
2255 // unsigned integers.
2256 if (C.ICmpType != SystemZICMP::SignedOnly &&
2257 isUInt<16>(ConstOp1->getZExtValue()))
2258 return false;
2259 // The signed memory-immediate instructions can handle 16-bit
2260 // signed integers.
2261 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2262 isInt<16>(ConstOp1->getSExtValue()))
2263 return false;
2264 return true;
2265 }
2266
2267 // Try to promote the use of CGFR and CLGFR.
2268 unsigned Opcode0 = C.Op0.getOpcode();
2269 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2270 return true;
2271 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2272 return true;
2273 if (C.ICmpType != SystemZICMP::SignedOnly &&
2274 Opcode0 == ISD::AND &&
2275 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2276 cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
2277 return true;
2278
2279 return false;
2280}
2281
2282// Check whether C tests for equality between X and Y and whether X - Y
2283// or Y - X is also computed. In that case it's better to compare the
2284// result of the subtraction against zero.
2285static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
2286 Comparison &C) {
2287 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2288 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2289 for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
2290 SDNode *N = *I;
2291 if (N->getOpcode() == ISD::SUB &&
2292 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2293 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2294 C.Op0 = SDValue(N, 0);
2295 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2296 return;
2297 }
2298 }
2299 }
2300}
2301
2302// Check whether C compares a floating-point value with zero and if that
2303// floating-point value is also negated. In this case we can use the
2304// negation to set CC, so avoiding separate LOAD AND TEST and
2305// LOAD (NEGATIVE/COMPLEMENT) instructions.
2306static void adjustForFNeg(Comparison &C) {
2307 // This optimization is invalid for strict comparisons, since FNEG
2308 // does not raise any exceptions.
2309 if (C.Chain)
2310 return;
2311 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2312 if (C1 && C1->isZero()) {
2313 for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
2314 SDNode *N = *I;
2315 if (N->getOpcode() == ISD::FNEG) {
2316 C.Op0 = SDValue(N, 0);
2317 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2318 return;
2319 }
2320 }
2321 }
2322}
2323
2324// Check whether C compares (shl X, 32) with 0 and whether X is
2325// also sign-extended. In that case it is better to test the result
2326// of the sign extension using LTGFR.
2327//
2328// This case is important because InstCombine transforms a comparison
2329// with (sext (trunc X)) into a comparison with (shl X, 32).
2330static void adjustForLTGFR(Comparison &C) {
2331 // Check for a comparison between (shl X, 32) and 0.
2332 if (C.Op0.getOpcode() == ISD::SHL &&
2333 C.Op0.getValueType() == MVT::i64 &&
2334 C.Op1.getOpcode() == ISD::Constant &&
2335 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2336 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2337 if (C1 && C1->getZExtValue() == 32) {
2338 SDValue ShlOp0 = C.Op0.getOperand(0);
2339 // See whether X has any SIGN_EXTEND_INREG uses.
2340 for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) {
2341 SDNode *N = *I;
2342 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2343 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2344 C.Op0 = SDValue(N, 0);
2345 return;
2346 }
2347 }
2348 }
2349 }
2350}
2351
2352// If C compares the truncation of an extending load, try to compare
2353// the untruncated value instead. This exposes more opportunities to
2354// reuse CC.
2355static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2356 Comparison &C) {
2357 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2358 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2359 C.Op1.getOpcode() == ISD::Constant &&
2360 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2361 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2362 if (L->getMemoryVT().getStoreSizeInBits().getFixedSize() <=
2363 C.Op0.getValueSizeInBits().getFixedSize()) {
2364 unsigned Type = L->getExtensionType();
2365 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2366 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2367 C.Op0 = C.Op0.getOperand(0);
2368 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2369 }
2370 }
2371 }
2372}
2373
2374// Return true if shift operation N has an in-range constant shift value.
2375// Store it in ShiftVal if so.
2376static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2377 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2378 if (!Shift)
2379 return false;
2380
2381 uint64_t Amount = Shift->getZExtValue();
2382 if (Amount >= N.getValueSizeInBits())
2383 return false;
2384
2385 ShiftVal = Amount;
2386 return true;
2387}
2388
2389// Check whether an AND with Mask is suitable for a TEST UNDER MASK
2390// instruction and whether the CC value is descriptive enough to handle
2391// a comparison of type Opcode between the AND result and CmpVal.
2392// CCMask says which comparison result is being tested and BitSize is
2393// the number of bits in the operands. If TEST UNDER MASK can be used,
2394// return the corresponding CC mask, otherwise return 0.
2395static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2396 uint64_t Mask, uint64_t CmpVal,
2397 unsigned ICmpType) {
2398 assert(Mask != 0 && "ANDs with zero should have been removed by now")(static_cast <bool> (Mask != 0 && "ANDs with zero should have been removed by now"
) ? void (0) : __assert_fail ("Mask != 0 && \"ANDs with zero should have been removed by now\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2398, __extension__ __PRETTY_FUNCTION__))
;
2399
2400 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2401 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2402 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2403 return 0;
2404
2405 // Work out the masks for the lowest and highest bits.
2406 unsigned HighShift = 63 - countLeadingZeros(Mask);
2407 uint64_t High = uint64_t(1) << HighShift;
2408 uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
2409
2410 // Signed ordered comparisons are effectively unsigned if the sign
2411 // bit is dropped.
2412 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2413
2414 // Check for equality comparisons with 0, or the equivalent.
2415 if (CmpVal == 0) {
2416 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2417 return SystemZ::CCMASK_TM_ALL_0;
2418 if (CCMask == SystemZ::CCMASK_CMP_NE)
2419 return SystemZ::CCMASK_TM_SOME_1;
2420 }
2421 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2422 if (CCMask == SystemZ::CCMASK_CMP_LT)
2423 return SystemZ::CCMASK_TM_ALL_0;
2424 if (CCMask == SystemZ::CCMASK_CMP_GE)
2425 return SystemZ::CCMASK_TM_SOME_1;
2426 }
2427 if (EffectivelyUnsigned && CmpVal < Low) {
2428 if (CCMask == SystemZ::CCMASK_CMP_LE)
2429 return SystemZ::CCMASK_TM_ALL_0;
2430 if (CCMask == SystemZ::CCMASK_CMP_GT)
2431 return SystemZ::CCMASK_TM_SOME_1;
2432 }
2433
2434 // Check for equality comparisons with the mask, or the equivalent.
2435 if (CmpVal == Mask) {
2436 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2437 return SystemZ::CCMASK_TM_ALL_1;
2438 if (CCMask == SystemZ::CCMASK_CMP_NE)
2439 return SystemZ::CCMASK_TM_SOME_0;
2440 }
2441 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2442 if (CCMask == SystemZ::CCMASK_CMP_GT)
2443 return SystemZ::CCMASK_TM_ALL_1;
2444 if (CCMask == SystemZ::CCMASK_CMP_LE)
2445 return SystemZ::CCMASK_TM_SOME_0;
2446 }
2447 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2448 if (CCMask == SystemZ::CCMASK_CMP_GE)
2449 return SystemZ::CCMASK_TM_ALL_1;
2450 if (CCMask == SystemZ::CCMASK_CMP_LT)
2451 return SystemZ::CCMASK_TM_SOME_0;
2452 }
2453
2454 // Check for ordered comparisons with the top bit.
2455 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2456 if (CCMask == SystemZ::CCMASK_CMP_LE)
2457 return SystemZ::CCMASK_TM_MSB_0;
2458 if (CCMask == SystemZ::CCMASK_CMP_GT)
2459 return SystemZ::CCMASK_TM_MSB_1;
2460 }
2461 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2462 if (CCMask == SystemZ::CCMASK_CMP_LT)
2463 return SystemZ::CCMASK_TM_MSB_0;
2464 if (CCMask == SystemZ::CCMASK_CMP_GE)
2465 return SystemZ::CCMASK_TM_MSB_1;
2466 }
2467
2468 // If there are just two bits, we can do equality checks for Low and High
2469 // as well.
2470 if (Mask == Low + High) {
2471 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2472 return SystemZ::CCMASK_TM_MIXED_MSB_0;
2473 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2474 return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY;
2475 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2476 return SystemZ::CCMASK_TM_MIXED_MSB_1;
2477 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2478 return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY;
2479 }
2480
2481 // Looks like we've exhausted our options.
2482 return 0;
2483}
2484
2485// See whether C can be implemented as a TEST UNDER MASK instruction.
2486// Update the arguments with the TM version if so.
2487static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
2488 Comparison &C) {
2489 // Check that we have a comparison with a constant.
2490 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2491 if (!ConstOp1)
2492 return;
2493 uint64_t CmpVal = ConstOp1->getZExtValue();
2494
2495 // Check whether the nonconstant input is an AND with a constant mask.
2496 Comparison NewC(C);
2497 uint64_t MaskVal;
2498 ConstantSDNode *Mask = nullptr;
2499 if (C.Op0.getOpcode() == ISD::AND) {
2500 NewC.Op0 = C.Op0.getOperand(0);
2501 NewC.Op1 = C.Op0.getOperand(1);
2502 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2503 if (!Mask)
2504 return;
2505 MaskVal = Mask->getZExtValue();
2506 } else {
2507 // There is no instruction to compare with a 64-bit immediate
2508 // so use TMHH instead if possible. We need an unsigned ordered
2509 // comparison with an i64 immediate.
2510 if (NewC.Op0.getValueType() != MVT::i64 ||
2511 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2512 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2513 NewC.ICmpType == SystemZICMP::SignedOnly)
2514 return;
2515 // Convert LE and GT comparisons into LT and GE.
2516 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2517 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2518 if (CmpVal == uint64_t(-1))
2519 return;
2520 CmpVal += 1;
2521 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2522 }
2523 // If the low N bits of Op1 are zero than the low N bits of Op0 can
2524 // be masked off without changing the result.
2525 MaskVal = -(CmpVal & -CmpVal);
2526 NewC.ICmpType = SystemZICMP::UnsignedOnly;
2527 }
2528 if (!MaskVal)
2529 return;
2530
2531 // Check whether the combination of mask, comparison value and comparison
2532 // type are suitable.
2533 unsigned BitSize = NewC.Op0.getValueSizeInBits();
2534 unsigned NewCCMask, ShiftVal;
2535 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2536 NewC.Op0.getOpcode() == ISD::SHL &&
2537 isSimpleShift(NewC.Op0, ShiftVal) &&
2538 (MaskVal >> ShiftVal != 0) &&
2539 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2540 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2541 MaskVal >> ShiftVal,
2542 CmpVal >> ShiftVal,
2543 SystemZICMP::Any))) {
2544 NewC.Op0 = NewC.Op0.getOperand(0);
2545 MaskVal >>= ShiftVal;
2546 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2547 NewC.Op0.getOpcode() == ISD::SRL &&
2548 isSimpleShift(NewC.Op0, ShiftVal) &&
2549 (MaskVal << ShiftVal != 0) &&
2550 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2551 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2552 MaskVal << ShiftVal,
2553 CmpVal << ShiftVal,
2554 SystemZICMP::UnsignedOnly))) {
2555 NewC.Op0 = NewC.Op0.getOperand(0);
2556 MaskVal <<= ShiftVal;
2557 } else {
2558 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2559 NewC.ICmpType);
2560 if (!NewCCMask)
2561 return;
2562 }
2563
2564 // Go ahead and make the change.
2565 C.Opcode = SystemZISD::TM;
2566 C.Op0 = NewC.Op0;
2567 if (Mask && Mask->getZExtValue() == MaskVal)
2568 C.Op1 = SDValue(Mask, 0);
2569 else
2570 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2571 C.CCValid = SystemZ::CCMASK_TM;
2572 C.CCMask = NewCCMask;
2573}
2574
2575// See whether the comparison argument contains a redundant AND
2576// and remove it if so. This sometimes happens due to the generic
2577// BRCOND expansion.
2578static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
2579 Comparison &C) {
2580 if (C.Op0.getOpcode() != ISD::AND)
2581 return;
2582 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2583 if (!Mask)
2584 return;
2585 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
2586 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2587 return;
2588
2589 C.Op0 = C.Op0.getOperand(0);
2590}
2591
2592// Return a Comparison that tests the condition-code result of intrinsic
2593// node Call against constant integer CC using comparison code Cond.
2594// Opcode is the opcode of the SystemZISD operation for the intrinsic
2595// and CCValid is the set of possible condition-code results.
2596static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2597 SDValue Call, unsigned CCValid, uint64_t CC,
2598 ISD::CondCode Cond) {
2599 Comparison C(Call, SDValue(), SDValue());
2600 C.Opcode = Opcode;
2601 C.CCValid = CCValid;
2602 if (Cond == ISD::SETEQ)
2603 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2604 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2605 else if (Cond == ISD::SETNE)
2606 // ...and the inverse of that.
2607 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2608 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2609 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2610 // always true for CC>3.
2611 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
2612 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2613 // ...and the inverse of that.
2614 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2615 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2616 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2617 // always true for CC>3.
2618 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2619 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2620 // ...and the inverse of that.
2621 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
2622 else
2623 llvm_unreachable("Unexpected integer comparison type")::llvm::llvm_unreachable_internal("Unexpected integer comparison type"
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2623)
;
2624 C.CCMask &= CCValid;
2625 return C;
2626}
2627
2628// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2629static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2630 ISD::CondCode Cond, const SDLoc &DL,
2631 SDValue Chain = SDValue(),
2632 bool IsSignaling = false) {
2633 if (CmpOp1.getOpcode() == ISD::Constant) {
2634 assert(!Chain)(static_cast <bool> (!Chain) ? void (0) : __assert_fail
("!Chain", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2634, __extension__ __PRETTY_FUNCTION__))
;
2635 uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2636 unsigned Opcode, CCValid;
2637 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2638 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2639 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2640 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2641 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2642 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2643 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2644 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2645 }
2646 Comparison C(CmpOp0, CmpOp1, Chain);
2647 C.CCMask = CCMaskForCondCode(Cond);
2648 if (C.Op0.getValueType().isFloatingPoint()) {
2649 C.CCValid = SystemZ::CCMASK_FCMP;
2650 if (!C.Chain)
2651 C.Opcode = SystemZISD::FCMP;
2652 else if (!IsSignaling)
2653 C.Opcode = SystemZISD::STRICT_FCMP;
2654 else
2655 C.Opcode = SystemZISD::STRICT_FCMPS;
2656 adjustForFNeg(C);
2657 } else {
2658 assert(!C.Chain)(static_cast <bool> (!C.Chain) ? void (0) : __assert_fail
("!C.Chain", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2658, __extension__ __PRETTY_FUNCTION__))
;
2659 C.CCValid = SystemZ::CCMASK_ICMP;
2660 C.Opcode = SystemZISD::ICMP;
2661 // Choose the type of comparison. Equality and inequality tests can
2662 // use either signed or unsigned comparisons. The choice also doesn't
2663 // matter if both sign bits are known to be clear. In those cases we
2664 // want to give the main isel code the freedom to choose whichever
2665 // form fits best.
2666 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2667 C.CCMask == SystemZ::CCMASK_CMP_NE ||
2668 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2669 C.ICmpType = SystemZICMP::Any;
2670 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2671 C.ICmpType = SystemZICMP::UnsignedOnly;
2672 else
2673 C.ICmpType = SystemZICMP::SignedOnly;
2674 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2675 adjustForRedundantAnd(DAG, DL, C);
2676 adjustZeroCmp(DAG, DL, C);
2677 adjustSubwordCmp(DAG, DL, C);
2678 adjustForSubtraction(DAG, DL, C);
2679 adjustForLTGFR(C);
2680 adjustICmpTruncate(DAG, DL, C);
2681 }
2682
2683 if (shouldSwapCmpOperands(C)) {
2684 std::swap(C.Op0, C.Op1);
2685 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2686 }
2687
2688 adjustForTestUnderMask(DAG, DL, C);
2689 return C;
2690}
2691
2692// Emit the comparison instruction described by C.
2693static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2694 if (!C.Op1.getNode()) {
2695 SDNode *Node;
2696 switch (C.Op0.getOpcode()) {
2697 case ISD::INTRINSIC_W_CHAIN:
2698 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
2699 return SDValue(Node, 0);
2700 case ISD::INTRINSIC_WO_CHAIN:
2701 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
2702 return SDValue(Node, Node->getNumValues() - 1);
2703 default:
2704 llvm_unreachable("Invalid comparison operands")::llvm::llvm_unreachable_internal("Invalid comparison operands"
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2704)
;
2705 }
2706 }
2707 if (C.Opcode == SystemZISD::ICMP)
2708 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
2709 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
2710 if (C.Opcode == SystemZISD::TM) {
2711 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2712 bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2713 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
2714 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
2715 }
2716 if (C.Chain) {
2717 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
2718 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
2719 }
2720 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
2721}
2722
2723// Implement a 32-bit *MUL_LOHI operation by extending both operands to
2724// 64 bits. Extend is the extension type to use. Store the high part
2725// in Hi and the low part in Lo.
2726static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
2727 SDValue Op0, SDValue Op1, SDValue &Hi,
2728 SDValue &Lo) {
2729 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2730 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2731 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
2732 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2733 DAG.getConstant(32, DL, MVT::i64));
2734 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2735 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2736}
2737
2738// Lower a binary operation that produces two VT results, one in each
2739// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2740// and Opcode performs the GR128 operation. Store the even register result
2741// in Even and the odd register result in Odd.
2742static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2743 unsigned Opcode, SDValue Op0, SDValue Op1,
2744 SDValue &Even, SDValue &Odd) {
2745 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
2746 bool Is32Bit = is32Bit(VT);
2747 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2748 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
2749}
2750
2751// Return an i32 value that is 1 if the CC value produced by CCReg is
2752// in the mask CCMask and 0 otherwise. CC is known to have a value
2753// in CCValid, so other values can be ignored.
2754static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
2755 unsigned CCValid, unsigned CCMask) {
2756 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
2757 DAG.getConstant(0, DL, MVT::i32),
2758 DAG.getTargetConstant(CCValid, DL, MVT::i32),
2759 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
2760 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
2761}
2762
2763// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2764// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
2765// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
2766// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
2767// floating-point comparisons.
2768enum class CmpMode { Int, FP, StrictFP, SignalingFP };
2769static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) {
2770 switch (CC) {
2771 case ISD::SETOEQ:
2772 case ISD::SETEQ:
2773 switch (Mode) {
2774 case CmpMode::Int: return SystemZISD::VICMPE;
2775 case CmpMode::FP: return SystemZISD::VFCMPE;
2776 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
2777 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
2778 }
2779 llvm_unreachable("Bad mode")::llvm::llvm_unreachable_internal("Bad mode", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2779)
;
2780
2781 case ISD::SETOGE:
2782 case ISD::SETGE:
2783 switch (Mode) {
2784 case CmpMode::Int: return 0;
2785 case CmpMode::FP: return SystemZISD::VFCMPHE;
2786 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
2787 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
2788 }
2789 llvm_unreachable("Bad mode")::llvm::llvm_unreachable_internal("Bad mode", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2789)
;
2790
2791 case ISD::SETOGT:
2792 case ISD::SETGT:
2793 switch (Mode) {
2794 case CmpMode::Int: return SystemZISD::VICMPH;
2795 case CmpMode::FP: return SystemZISD::VFCMPH;
2796 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
2797 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
2798 }
2799 llvm_unreachable("Bad mode")::llvm::llvm_unreachable_internal("Bad mode", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2799)
;
2800
2801 case ISD::SETUGT:
2802 switch (Mode) {
2803 case CmpMode::Int: return SystemZISD::VICMPHL;
2804 case CmpMode::FP: return 0;
2805 case CmpMode::StrictFP: return 0;
2806 case CmpMode::SignalingFP: return 0;
2807 }
2808 llvm_unreachable("Bad mode")::llvm::llvm_unreachable_internal("Bad mode", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2808)
;
2809
2810 default:
2811 return 0;
2812 }
2813}
2814
2815// Return the SystemZISD vector comparison operation for CC or its inverse,
2816// or 0 if neither can be done directly. Indicate in Invert whether the
2817// result is for the inverse of CC. Mode is as above.
2818static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode,
2819 bool &Invert) {
2820 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2821 Invert = false;
2822 return Opcode;
2823 }
2824
2825 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
2826 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2827 Invert = true;
2828 return Opcode;
2829 }
2830
2831 return 0;
2832}
2833
2834// Return a v2f64 that contains the extended form of elements Start and Start+1
2835// of v4f32 value Op. If Chain is nonnull, return the strict form.
2836static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
2837 SDValue Op, SDValue Chain) {
2838 int Mask[] = { Start, -1, Start + 1, -1 };
2839 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
2840 if (Chain) {
2841 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
2842 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
2843 }
2844 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2845}
2846
2847// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2848// producing a result of type VT. If Chain is nonnull, return the strict form.
2849SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
2850 const SDLoc &DL, EVT VT,
2851 SDValue CmpOp0,
2852 SDValue CmpOp1,
2853 SDValue Chain) const {
2854 // There is no hardware support for v4f32 (unless we have the vector
2855 // enhancements facility 1), so extend the vector into two v2f64s
2856 // and compare those.
2857 if (CmpOp0.getValueType() == MVT::v4f32 &&
2858 !Subtarget.hasVectorEnhancements1()) {
2859 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
2860 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
2861 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
2862 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
2863 if (Chain) {
2864 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
2865 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
2866 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
2867 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2868 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
2869 H1.getValue(1), L1.getValue(1),
2870 HRes.getValue(1), LRes.getValue(1) };
2871 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
2872 SDValue Ops[2] = { Res, NewChain };
2873 return DAG.getMergeValues(Ops, DL);
2874 }
2875 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
2876 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
2877 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2878 }
2879 if (Chain) {
2880 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
2881 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
2882 }
2883 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
2884}
2885
2886// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
2887// an integer mask of type VT. If Chain is nonnull, we have a strict
2888// floating-point comparison. If in addition IsSignaling is true, we have
2889// a strict signaling floating-point comparison.
2890SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
2891 const SDLoc &DL, EVT VT,
2892 ISD::CondCode CC,
2893 SDValue CmpOp0,
2894 SDValue CmpOp1,
2895 SDValue Chain,
2896 bool IsSignaling) const {
2897 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
2898 assert (!Chain || IsFP)(static_cast <bool> (!Chain || IsFP) ? void (0) : __assert_fail
("!Chain || IsFP", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2898, __extension__ __PRETTY_FUNCTION__))
;
2899 assert (!IsSignaling || Chain)(static_cast <bool> (!IsSignaling || Chain) ? void (0) :
__assert_fail ("!IsSignaling || Chain", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2899, __extension__ __PRETTY_FUNCTION__))
;
2900 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
2901 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
2902 bool Invert = false;
2903 SDValue Cmp;
2904 switch (CC) {
2905 // Handle tests for order using (or (ogt y x) (oge x y)).
2906 case ISD::SETUO:
2907 Invert = true;
2908 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2909 case ISD::SETO: {
2910 assert(IsFP && "Unexpected integer comparison")(static_cast <bool> (IsFP && "Unexpected integer comparison"
) ? void (0) : __assert_fail ("IsFP && \"Unexpected integer comparison\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2910, __extension__ __PRETTY_FUNCTION__))
;
2911 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
2912 DL, VT, CmpOp1, CmpOp0, Chain);
2913 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
2914 DL, VT, CmpOp0, CmpOp1, Chain);
2915 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
2916 if (Chain)
2917 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2918 LT.getValue(1), GE.getValue(1));
2919 break;
2920 }
2921
2922 // Handle <> tests using (or (ogt y x) (ogt x y)).
2923 case ISD::SETUEQ:
2924 Invert = true;
2925 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2926 case ISD::SETONE: {
2927 assert(IsFP && "Unexpected integer comparison")(static_cast <bool> (IsFP && "Unexpected integer comparison"
) ? void (0) : __assert_fail ("IsFP && \"Unexpected integer comparison\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2927, __extension__ __PRETTY_FUNCTION__))
;
2928 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
2929 DL, VT, CmpOp1, CmpOp0, Chain);
2930 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
2931 DL, VT, CmpOp0, CmpOp1, Chain);
2932 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
2933 if (Chain)
2934 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2935 LT.getValue(1), GT.getValue(1));
2936 break;
2937 }
2938
2939 // Otherwise a single comparison is enough. It doesn't really
2940 // matter whether we try the inversion or the swap first, since
2941 // there are no cases where both work.
2942 default:
2943 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
2944 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
2945 else {
2946 CC = ISD::getSetCCSwappedOperands(CC);
2947 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
2948 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
2949 else
2950 llvm_unreachable("Unhandled comparison")::llvm::llvm_unreachable_internal("Unhandled comparison", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2950)
;
2951 }
2952 if (Chain)
2953 Chain = Cmp.getValue(1);
2954 break;
2955 }
2956 if (Invert) {
2957 SDValue Mask =
2958 DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
2959 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
2960 }
2961 if (Chain && Chain.getNode() != Cmp.getNode()) {
2962 SDValue Ops[2] = { Cmp, Chain };
2963 Cmp = DAG.getMergeValues(Ops, DL);
2964 }
2965 return Cmp;
2966}
2967
2968SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
2969 SelectionDAG &DAG) const {
2970 SDValue CmpOp0 = Op.getOperand(0);
2971 SDValue CmpOp1 = Op.getOperand(1);
2972 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2973 SDLoc DL(Op);
2974 EVT VT = Op.getValueType();
2975 if (VT.isVector())
2976 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
2977
2978 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2979 SDValue CCReg = emitCmp(DAG, DL, C);
2980 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
2981}
2982
2983SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
2984 SelectionDAG &DAG,
2985 bool IsSignaling) const {
2986 SDValue Chain = Op.getOperand(0);
2987 SDValue CmpOp0 = Op.getOperand(1);
2988 SDValue CmpOp1 = Op.getOperand(2);
2989 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
2990 SDLoc DL(Op);
2991 EVT VT = Op.getNode()->getValueType(0);
2992 if (VT.isVector()) {
2993 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
2994 Chain, IsSignaling);
2995 return Res.getValue(Op.getResNo());
2996 }
2997
2998 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
2999 SDValue CCReg = emitCmp(DAG, DL, C);
3000 CCReg->setFlags(Op->getFlags());
3001 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3002 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3003 return DAG.getMergeValues(Ops, DL);
3004}
3005
3006SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3007 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3008 SDValue CmpOp0 = Op.getOperand(2);
3009 SDValue CmpOp1 = Op.getOperand(3);
3010 SDValue Dest = Op.getOperand(4);
3011 SDLoc DL(Op);
3012
3013 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3014 SDValue CCReg = emitCmp(DAG, DL, C);
3015 return DAG.getNode(
3016 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3017 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3018 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3019}
3020
3021// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3022// allowing Pos and Neg to be wider than CmpOp.
3023static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3024 return (Neg.getOpcode() == ISD::SUB &&
3025 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3026 cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
3027 Neg.getOperand(1) == Pos &&
3028 (Pos == CmpOp ||
3029 (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3030 Pos.getOperand(0) == CmpOp)));
3031}
3032
3033// Return the absolute or negative absolute of Op; IsNegative decides which.
3034static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op,
3035 bool IsNegative) {
3036 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3037 if (IsNegative)
3038 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3039 DAG.getConstant(0, DL, Op.getValueType()), Op);
3040 return Op;
3041}
3042
3043SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3044 SelectionDAG &DAG) const {
3045 SDValue CmpOp0 = Op.getOperand(0);
3046 SDValue CmpOp1 = Op.getOperand(1);
3047 SDValue TrueOp = Op.getOperand(2);
3048 SDValue FalseOp = Op.getOperand(3);
3049 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3050 SDLoc DL(Op);
3051
3052 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3053
3054 // Check for absolute and negative-absolute selections, including those
3055 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3056 // This check supplements the one in DAGCombiner.
3057 if (C.Opcode == SystemZISD::ICMP &&
3058 C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3059 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3060 C.Op1.getOpcode() == ISD::Constant &&
3061 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
3062 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3063 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3064 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3065 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3066 }
3067
3068 SDValue CCReg = emitCmp(DAG, DL, C);
3069 SDValue Ops[] = {TrueOp, FalseOp,
3070 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3071 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3072
3073 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3074}
3075
3076SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3077 SelectionDAG &DAG) const {
3078 SDLoc DL(Node);
3079 const GlobalValue *GV = Node->getGlobal();
3080 int64_t Offset = Node->getOffset();
3081 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3082 CodeModel::Model CM = DAG.getTarget().getCodeModel();
3083
3084 SDValue Result;
3085 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3086 if (isInt<32>(Offset)) {
3087 // Assign anchors at 1<<12 byte boundaries.
3088 uint64_t Anchor = Offset & ~uint64_t(0xfff);
3089 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3090 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3091
3092 // The offset can be folded into the address if it is aligned to a
3093 // halfword.
3094 Offset -= Anchor;
3095 if (Offset != 0 && (Offset & 1) == 0) {
3096 SDValue Full =
3097 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3098 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3099 Offset = 0;
3100 }
3101 } else {
3102 // Conservatively load a constant offset greater than 32 bits into a
3103 // register below.
3104 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3105 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3106 }
3107 } else {
3108 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3109 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3110 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3111 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3112 }
3113
3114 // If there was a non-zero offset that we didn't fold, create an explicit
3115 // addition for it.
3116 if (Offset != 0)
3117 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3118 DAG.getConstant(Offset, DL, PtrVT));
3119
3120 return Result;
3121}
3122
3123SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3124 SelectionDAG &DAG,
3125 unsigned Opcode,
3126 SDValue GOTOffset) const {
3127 SDLoc DL(Node);
3128 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3129 SDValue Chain = DAG.getEntryNode();
3130 SDValue Glue;
3131
3132 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3133 CallingConv::GHC)
3134 report_fatal_error("In GHC calling convention TLS is not supported");
3135
3136 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3137 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3138 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3139 Glue = Chain.getValue(1);
3140 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3141 Glue = Chain.getValue(1);
3142
3143 // The first call operand is the chain and the second is the TLS symbol.
3144 SmallVector<SDValue, 8> Ops;
3145 Ops.push_back(Chain);
3146 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3147 Node->getValueType(0),
3148 0, 0));
3149
3150 // Add argument registers to the end of the list so that they are
3151 // known live into the call.
3152 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3153 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3154
3155 // Add a register mask operand representing the call-preserved registers.
3156 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3157 const uint32_t *Mask =
3158 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3159 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 3159, __extension__ __PRETTY_FUNCTION__))
;
3160 Ops.push_back(DAG.getRegisterMask(Mask));
3161
3162 // Glue the call to the argument copies.
3163 Ops.push_back(Glue);
3164
3165 // Emit the call.
3166 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3167 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3168 Glue = Chain.getValue(1);
3169
3170 // Copy the return value from %r2.
3171 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3172}
3173
3174SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3175 SelectionDAG &DAG) const {
3176 SDValue Chain = DAG.getEntryNode();
3177 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3178
3179 // The high part of the thread pointer is in access register 0.
3180 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3181 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3182
3183 // The low part of the thread pointer is in access register 1.
3184 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3185 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3186
3187 // Merge them into a single 64-bit address.
3188 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3189 DAG.getConstant(32, DL, PtrVT));
3190 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3191}
3192
3193SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3194 SelectionDAG &DAG) const {
3195 if (DAG.getTarget().useEmulatedTLS())
3196 return LowerToTLSEmulatedModel(Node, DAG);
3197 SDLoc DL(Node);
3198 const GlobalValue *GV = Node->getGlobal();
3199 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3200 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
3201
3202 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3203 CallingConv::GHC)
3204 report_fatal_error("In GHC calling convention TLS is not supported");
3205
3206 SDValue TP = lowerThreadPointer(DL, DAG);
3207
3208 // Get the offset of GA from the thread pointer, based on the TLS model.
3209 SDValue Offset;
3210 switch (model) {
3211 case TLSModel::GeneralDynamic: {
3212 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3213 SystemZConstantPoolValue *CPV =
3214 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
3215
3216 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3217 Offset = DAG.getLoad(
3218 PtrVT, DL, DAG.getEntryNode(), Offset,
3219 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3220
3221 // Call __tls_get_offset to retrieve the offset.
3222 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3223 break;
3224 }
3225
3226 case TLSModel::LocalDynamic: {
3227 // Load the GOT offset of the module ID.
3228 SystemZConstantPoolValue *CPV =
3229 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
3230
3231 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3232 Offset = DAG.getLoad(
3233 PtrVT, DL, DAG.getEntryNode(), Offset,
3234 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3235
3236 // Call __tls_get_offset to retrieve the module base offset.
3237 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3238
3239 // Note: The SystemZLDCleanupPass will remove redundant computations
3240 // of the module base offset. Count total number of local-dynamic
3241 // accesses to trigger execution of that pass.
3242 SystemZMachineFunctionInfo* MFI =
3243 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
3244 MFI->incNumLocalDynamicTLSAccesses();
3245
3246 // Add the per-symbol offset.
3247 CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
3248
3249 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3250 DTPOffset = DAG.getLoad(
3251 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3252 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3253
3254 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3255 break;
3256 }
3257
3258 case TLSModel::InitialExec: {
3259 // Load the offset from the GOT.
3260 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3261 SystemZII::MO_INDNTPOFF);
3262 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
3263 Offset =
3264 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3265 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3266 break;
3267 }
3268
3269 case TLSModel::LocalExec: {
3270 // Force the offset into the constant pool and load it from there.
3271 SystemZConstantPoolValue *CPV =
3272 SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
3273
3274 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3275 Offset = DAG.getLoad(
3276 PtrVT, DL, DAG.getEntryNode(), Offset,
3277 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3278 break;
3279 }
3280 }
3281
3282 // Add the base and offset together.
3283 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3284}
3285
3286SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3287 SelectionDAG &DAG) const {
3288 SDLoc DL(Node);
3289 const BlockAddress *BA = Node->getBlockAddress();
3290 int64_t Offset = Node->getOffset();
3291 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3292
3293 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3294 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3295 return Result;
3296}
3297
3298SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3299 SelectionDAG &DAG) const {
3300 SDLoc DL(JT);
3301 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3302 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3303
3304 // Use LARL to load the address of the table.
3305 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3306}
3307
3308SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3309 SelectionDAG &DAG) const {
3310 SDLoc DL(CP);
3311 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3312
3313 SDValue Result;
3314 if (CP->isMachineConstantPoolEntry())
3315 Result =
3316 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3317 else
3318 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3319 CP->getOffset());
3320
3321 // Use LARL to load the address of the constant pool entry.
3322 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3323}
3324
3325SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3326 SelectionDAG &DAG) const {
3327 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
3328 MachineFunction &MF = DAG.getMachineFunction();
3329 MachineFrameInfo &MFI = MF.getFrameInfo();
3330 MFI.setFrameAddressIsTaken(true);
3331
3332 SDLoc DL(Op);
3333 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3334 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3335
3336 // By definition, the frame address is the address of the back chain. (In
3337 // the case of packed stack without backchain, return the address where the
3338 // backchain would have been stored. This will either be an unused space or
3339 // contain a saved register).
3340 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3341 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3342
3343 // FIXME The frontend should detect this case.
3344 if (Depth > 0) {
3345 report_fatal_error("Unsupported stack frame traversal count");
3346 }
3347
3348 return BackChain;
3349}
3350
3351SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3352 SelectionDAG &DAG) const {
3353 MachineFunction &MF = DAG.getMachineFunction();
3354 MachineFrameInfo &MFI = MF.getFrameInfo();
3355 MFI.setReturnAddressIsTaken(true);
3356
3357 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
3358 return SDValue();
3359
3360 SDLoc DL(Op);
3361 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3362 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3363
3364 // FIXME The frontend should detect this case.
3365 if (Depth > 0) {
3366 report_fatal_error("Unsupported stack frame traversal count");
3367 }
3368
3369 // Return R14D, which has the return address. Mark it an implicit live-in.
3370 unsigned LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
3371 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3372}
3373
3374SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3375 SelectionDAG &DAG) const {
3376 SDLoc DL(Op);
3377 SDValue In = Op.getOperand(0);
3378 EVT InVT = In.getValueType();
3379 EVT ResVT = Op.getValueType();
3380
3381 // Convert loads directly. This is normally done by DAGCombiner,
3382 // but we need this case for bitcasts that are created during lowering
3383 // and which are then lowered themselves.
3384 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3385 if (ISD::isNormalLoad(LoadN)) {
3386 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3387 LoadN->getBasePtr(), LoadN->getMemOperand());
3388 // Update the chain uses.
3389 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3390 return NewLoad;
3391 }
3392
3393 if (InVT == MVT::i32 && ResVT == MVT::f32) {
3394 SDValue In64;
3395 if (Subtarget.hasHighWord()) {
3396 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3397 MVT::i64);
3398 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3399 MVT::i64, SDValue(U64, 0), In);
3400 } else {
3401 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3402 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3403 DAG.getConstant(32, DL, MVT::i64));
3404 }
3405 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3406 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3407 DL, MVT::f32, Out64);
3408 }
3409 if (InVT == MVT::f32 && ResVT == MVT::i32) {
3410 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3411 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3412 MVT::f64, SDValue(U64, 0), In);
3413 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3414 if (Subtarget.hasHighWord())
3415 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3416 MVT::i32, Out64);
3417 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3418 DAG.getConstant(32, DL, MVT::i64));
3419 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3420 }
3421 llvm_unreachable("Unexpected bitcast combination")::llvm::llvm_unreachable_internal("Unexpected bitcast combination"
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 3421)
;
3422}
3423
3424SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3425 SelectionDAG &DAG) const {
3426 MachineFunction &MF = DAG.getMachineFunction();
3427 SystemZMachineFunctionInfo *FuncInfo =
3428 MF.getInfo<SystemZMachineFunctionInfo>();
3429 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3430
3431 SDValue Chain = Op.getOperand(0);
3432 SDValue Addr = Op.getOperand(1);
3433 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3434 SDLoc DL(Op);
3435
3436 // The initial values of each field.
3437 const unsigned NumFields = 4;
3438 SDValue Fields[NumFields] = {
3439 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3440 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3441 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3442 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3443 };
3444
3445 // Store each field into its respective slot.
3446 SDValue MemOps[NumFields];
3447 unsigned Offset = 0;
3448 for (unsigned I = 0; I < NumFields; ++I) {
3449 SDValue FieldAddr = Addr;
3450 if (Offset != 0)
3451 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3452 DAG.getIntPtrConstant(Offset, DL));
3453 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3454 MachinePointerInfo(SV, Offset));
3455 Offset += 8;
3456 }
3457 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3458}
3459
3460SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3461 SelectionDAG &DAG) const {
3462 SDValue Chain = Op.getOperand(0);
3463 SDValue DstPtr = Op.getOperand(1);
3464 SDValue SrcPtr = Op.getOperand(2);
3465 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3466 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3467 SDLoc DL(Op);
3468
3469 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
3470 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3471 /*isTailCall*/ false, MachinePointerInfo(DstSV),
3472 MachinePointerInfo(SrcSV));
3473}
3474
3475SDValue SystemZTargetLowering::
3476lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
3477 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3478 MachineFunction &MF = DAG.getMachineFunction();
3479 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3480 bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3481
3482 SDValue Chain = Op.getOperand(0);
3483 SDValue Size = Op.getOperand(1);
3484 SDValue Align = Op.getOperand(2);
3485 SDLoc DL(Op);
3486
3487 // If user has set the no alignment function attribute, ignore
3488 // alloca alignments.
3489 uint64_t AlignVal =
3490 (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3491
3492 uint64_t StackAlign = TFI->getStackAlignment();
3493 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3494 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3495
3496 Register SPReg = getStackPointerRegisterToSaveRestore();
3497 SDValue NeededSpace = Size;
3498
3499 // Get a reference to the stack pointer.
3500 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
3501
3502 // If we need a backchain, save it now.
3503 SDValue Backchain;
3504 if (StoreBackchain)
3505 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
3506 MachinePointerInfo());
3507
3508 // Add extra space for alignment if needed.
3509 if (ExtraAlignSpace)
3510 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
3511 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3512
3513 // Get the new stack pointer value.
3514 SDValue NewSP;
3515 if (hasInlineStackProbe(MF)) {
3516 NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
3517 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
3518 Chain = NewSP.getValue(1);
3519 }
3520 else {
3521 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
3522 // Copy the new stack pointer back.
3523 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
3524 }
3525
3526 // The allocated data lives above the 160 bytes allocated for the standard
3527 // frame, plus any outgoing stack arguments. We don't know how much that
3528 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3529 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3530 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
3531
3532 // Dynamically realign if needed.
3533 if (RequiredAlign > StackAlign) {
3534 Result =
3535 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
3536 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3537 Result =
3538 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
3539 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
3540 }
3541
3542 if (StoreBackchain)
3543 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
3544 MachinePointerInfo());
3545
3546 SDValue Ops[2] = { Result, Chain };
3547 return DAG.getMergeValues(Ops, DL);
3548}
3549
3550SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3551 SDValue Op, SelectionDAG &DAG) const {
3552 SDLoc DL(Op);
3553
3554 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3555}
3556
3557SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
3558 SelectionDAG &DAG) const {
3559 EVT VT = Op.getValueType();
3560 SDLoc DL(Op);
3561 SDValue Ops[2];
3562 if (is32Bit(VT))
3563 // Just do a normal 64-bit multiplication and extract the results.
3564 // We define this so that it can be used for constant division.
3565 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
3566 Op.getOperand(1), Ops[1], Ops[0]);
3567 else if (Subtarget.hasMiscellaneousExtensions2())
3568 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3569 // the high result in the even register. ISD::SMUL_LOHI is defined to
3570 // return the low half first, so the results are in reverse order.
3571 lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI,
3572 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3573 else {
3574 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3575 //
3576 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3577 //
3578 // but using the fact that the upper halves are either all zeros
3579 // or all ones:
3580 //
3581 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3582 //
3583 // and grouping the right terms together since they are quicker than the
3584 // multiplication:
3585 //
3586 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3587 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
3588 SDValue LL = Op.getOperand(0);
3589 SDValue RL = Op.getOperand(1);
3590 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
3591 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
3592 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3593 // the high result in the even register. ISD::SMUL_LOHI is defined to
3594 // return the low half first, so the results are in reverse order.
3595 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
3596 LL, RL, Ops[1], Ops[0]);
3597 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
3598 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
3599 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
3600 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
3601 }
3602 return DAG.getMergeValues(Ops, DL);
3603}
3604
3605SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
3606 SelectionDAG &DAG) const {
3607 EVT VT = Op.getValueType();
3608 SDLoc DL(Op);
3609 SDValue Ops[2];
3610 if (is32Bit(VT))
3611 // Just do a normal 64-bit multiplication and extract the results.
3612 // We define this so that it can be used for constant division.
3613 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
3614 Op.getOperand(1), Ops[1], Ops[0]);
3615 else
3616 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3617 // the high result in the even register. ISD::UMUL_LOHI is defined to
3618 // return the low half first, so the results are in reverse order.
3619 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
3620 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3621 return DAG.getMergeValues(Ops, DL);
3622}
3623
3624SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
3625 SelectionDAG &DAG) const {
3626 SDValue Op0 = Op.getOperand(0);
3627 SDValue Op1 = Op.getOperand(1);
3628 EVT VT = Op.getValueType();
3629 SDLoc DL(Op);
3630
3631 // We use DSGF for 32-bit division. This means the first operand must
3632 // always be 64-bit, and the second operand should be 32-bit whenever
3633 // that is possible, to improve performance.
3634 if (is32Bit(VT))
3635 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
3636 else if (DAG.ComputeNumSignBits(Op1) > 32)
3637 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
3638
3639 // DSG(F) returns the remainder in the even register and the
3640 // quotient in the odd register.
3641 SDValue Ops[2];
3642 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
3643 return DAG.getMergeValues(Ops, DL);
3644}
3645
3646SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
3647 SelectionDAG &DAG) const {
3648 EVT VT = Op.getValueType();
3649 SDLoc DL(Op);
3650
3651 // DL(G) returns the remainder in the even register and the
3652 // quotient in the odd register.
3653 SDValue Ops[2];
3654 lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM,
3655 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3656 return DAG.getMergeValues(Ops, DL);
3657}
3658
3659SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
3660 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation")(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"Should be 64-bit operation") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Should be 64-bit operation\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 3660, __extension__ __PRETTY_FUNCTION__))
;
3661
3662 // Get the known-zero masks for each operand.
3663 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
3664 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
3665 DAG.computeKnownBits(Ops[1])};
3666
3667 // See if the upper 32 bits of one operand and the lower 32 bits of the
3668 // other are known zero. They are the low and high operands respectively.
3669 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
3670 Known[1].Zero.getZExtValue() };
3671 unsigned High, Low;
3672 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
3673 High = 1, Low = 0;
3674 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
3675 High = 0, Low = 1;
3676 else
3677 return Op;
3678
3679 SDValue LowOp = Ops[Low];
3680 SDValue HighOp = Ops[High];
3681
3682 // If the high part is a constant, we're better off using IILH.
3683 if (HighOp.getOpcode() == ISD::Constant)
3684 return Op;
3685
3686 // If the low part is a constant that is outside the range of LHI,
3687 // then we're better off using IILF.
3688 if (LowOp.getOpcode() == ISD::Constant) {
3689 int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
3690 if (!isInt<16>(Value))
3691 return Op;
3692 }
3693
3694 // Check whether the high part is an AND that doesn't change the
3695 // high 32 bits and just masks out low bits. We can skip it if so.
3696 if (HighOp.getOpcode() == ISD::AND &&
3697 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
3698 SDValue HighOp0 = HighOp.getOperand(0);
3699 uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
3700 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
3701 HighOp = HighOp0;
3702 }
3703
3704 // Take advantage of the fact that all GR32 operations only change the
3705 // low 32 bits by truncating Low to an i32 and inserting it directly
3706 // using a subreg. The interesting cases are those where the truncation
3707 // can be folded.
3708 SDLoc DL(Op);
3709 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
3710 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
3711 MVT::i64, HighOp, Low32);
3712}
3713
3714// Lower SADDO/SSUBO/UADDO/USUBO nodes.
3715SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
3716 SelectionDAG &DAG) const {
3717 SDNode *N = Op.getNode();
3718 SDValue LHS = N->getOperand(0);
3719 SDValue RHS = N->getOperand(1);
3720 SDLoc DL(N);
3721 unsigned BaseOp = 0;
3722 unsigned CCValid = 0;
3723 unsigned CCMask = 0;
3724
3725 switch (Op.getOpcode()) {
3726 default: llvm_unreachable("Unknown instruction!")::llvm::llvm_unreachable_internal("Unknown instruction!", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 3726)
;
3727 case ISD::SADDO:
3728 BaseOp = SystemZISD::SADDO;
3729 CCValid = SystemZ::CCMASK_ARITH;
3730 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
3731 break;
3732 case ISD::SSUBO:
3733 BaseOp = SystemZISD::SSUBO;
3734 CCValid = SystemZ::CCMASK_ARITH;
3735 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
3736 break;
3737 case ISD::UADDO:
3738 BaseOp = SystemZISD::UADDO;
3739 CCValid = SystemZ::CCMASK_LOGICAL;
3740 CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
3741 break;
3742 case ISD::USUBO:
3743 BaseOp = SystemZISD::USUBO;
3744 CCValid = SystemZ::CCMASK_LOGICAL;
3745 CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
3746 break;
3747 }
3748
3749 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
3750 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
3751
3752 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3753 if (N->getValueType(1) == MVT::i1)
3754 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3755
3756 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3757}
3758
3759static bool isAddCarryChain(SDValue Carry) {
3760 while (Carry.getOpcode() == ISD::ADDCARRY)
3761 Carry = Carry.getOperand(2);
3762 return Carry.getOpcode() == ISD::UADDO;
3763}
3764
3765static bool isSubBorrowChain(SDValue Carry) {
3766 while (Carry.getOpcode() == ISD::SUBCARRY)
3767 Carry = Carry.getOperand(2);
3768 return Carry.getOpcode() == ISD::USUBO;
3769}
3770
3771// Lower ADDCARRY/SUBCARRY nodes.
3772SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
3773 SelectionDAG &DAG) const {
3774
3775 SDNode *N = Op.getNode();
3776 MVT VT = N->getSimpleValueType(0);
3777
3778 // Let legalize expand this if it isn't a legal type yet.
3779 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3780 return SDValue();
3781
3782 SDValue LHS = N->getOperand(0);
3783 SDValue RHS = N->getOperand(1);
3784 SDValue Carry = Op.getOperand(2);
3785 SDLoc DL(N);
3786 unsigned BaseOp = 0;
3787 unsigned CCValid = 0;
3788 unsigned CCMask = 0;
3789
3790 switch (Op.getOpcode()) {
3791 default: llvm_unreachable("Unknown instruction!")::llvm::llvm_unreachable_internal("Unknown instruction!", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 3791)
;
3792 case ISD::ADDCARRY:
3793 if (!isAddCarryChain(Carry))
3794 return SDValue();
3795
3796 BaseOp = SystemZISD::ADDCARRY;
3797 CCValid = SystemZ::CCMASK_LOGICAL;
3798 CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
3799 break;
3800 case ISD::SUBCARRY:
3801 if (!isSubBorrowChain(Carry))
3802 return SDValue();
3803
3804 BaseOp = SystemZISD::SUBCARRY;
3805 CCValid = SystemZ::CCMASK_LOGICAL;
3806 CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
3807 break;
3808 }
3809
3810 // Set the condition code from the carry flag.
3811 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
3812 DAG.getConstant(CCValid, DL, MVT::i32),
3813 DAG.getConstant(CCMask, DL, MVT::i32));
3814
3815 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
3816 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
3817
3818 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3819 if (N->getValueType(1) == MVT::i1)
3820 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3821
3822 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3823}
3824
3825SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
3826 SelectionDAG &DAG) const {
3827 EVT VT = Op.getValueType();
3828 SDLoc DL(Op);
3829 Op = Op.getOperand(0);
3830
3831 // Handle vector types via VPOPCT.
3832 if (VT.isVector()) {
3833 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
3834 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
3835 switch (VT.getScalarSizeInBits()) {
3836 case 8:
3837 break;
3838 case 16: {
3839 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
3840 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
3841 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
3842 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3843 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
3844 break;
3845 }
3846 case 32: {
3847 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
3848 DAG.getConstant(0, DL, MVT::i32));
3849 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3850 break;
3851 }
3852 case 64: {
3853 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
3854 DAG.getConstant(0, DL, MVT::i32));
3855 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
3856 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3857 break;
3858 }
3859 default:
3860 llvm_unreachable("Unexpected type")::llvm::llvm_unreachable_internal("Unexpected type", "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 3860)
;
3861 }
3862 return Op;
3863 }
3864
3865 // Get the known-zero mask for the operand.
3866 KnownBits Known = DAG.computeKnownBits(Op);
3867 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
3868 if (NumSignificantBits == 0)
3869 return DAG.getConstant(0, DL, VT);
3870
3871 // Skip known-zero high parts of the operand.
3872 int64_t OrigBitSize = VT.getSizeInBits();
3873 int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
3874 BitSize = std::min(BitSize, OrigBitSize);
3875
3876 // The POPCNT instruction counts the number of bits in each byte.
3877 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
3878 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
3879 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
3880
3881 // Add up per-byte counts in a binary tree. All bits of Op at
3882 // position larger than BitSize remain zero throughout.
3883 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
3884 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
3885 if (BitSize != OrigBitSize)
3886 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
3887 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
3888 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3889 }
3890
3891 // Extract overall result from high byte.
3892 if (BitSize > 8)
3893 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
3894 DAG.getConstant(BitSize - 8, DL, VT));
3895
3896 return Op;
3897}
3898
3899SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3900 SelectionDAG &DAG) const {
3901 SDLoc DL(Op);
3902 AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
3903 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
3904 SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
3905 cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
3906
3907 // The only fence that needs an instruction is a sequentially-consistent
3908 // cross-thread fence.
3909 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
3910 FenceSSID == SyncScope::System) {
3911 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
3912 Op.getOperand(0)),
3913 0);
3914 }
3915
3916 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3917 return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3918}
3919
3920// Op is an atomic load. Lower it into a normal volatile load.
3921SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
3922 SelectionDAG &DAG) const {
3923 auto *Node = cast<AtomicSDNode>(Op.getNode());
3924 return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
3925 Node->getChain(), Node->getBasePtr(),
3926 Node->getMemoryVT(), Node->getMemOperand());
3927}
3928
3929// Op is an atomic store. Lower it into a normal volatile store.
3930SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
3931 SelectionDAG &DAG) const {
3932 auto *Node = cast<AtomicSDNode>(Op.getNode());
3933 SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
3934 Node->getBasePtr(), Node->getMemoryVT(),
3935 Node->getMemOperand());
3936 // We have to enforce sequential consistency by performing a
3937 // serialization operation after the store.
3938 if (Node->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent)
3939 Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
3940 MVT::Other, Chain), 0);
3941 return Chain;
3942}
3943
3944// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
3945// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
3946SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
3947 SelectionDAG &DAG,
3948 unsigned Opcode) const {
3949 auto *Node = cast<AtomicSDNode>(Op.getNode());
3950
3951 // 32-bit operations need no code outside the main loop.
3952 EVT NarrowVT = Node->getMemoryVT();
3953 EVT WideVT = MVT::i32;
3954 if (NarrowVT == WideVT)
3955 return Op;
3956
3957 int64_t BitSize = NarrowVT.getSizeInBits();
3958 SDValue ChainIn = Node->getChain();
3959 SDValue Addr = Node->getBasePtr();
3960 SDValue Src2 = Node->getVal();
3961 MachineMemOperand *MMO = Node->getMemOperand();
3962 SDLoc DL(Node);
3963 EVT PtrVT = Addr.getValueType();
3964
3965 // Convert atomic subtracts of constants into additions.
3966 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
3967 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
3968 Opcode = SystemZISD::ATOMIC_LOADW_ADD;
3969 Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
3970 }
3971
3972 // Get the address of the containing word.
3973 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
3974 DAG.getConstant(-4, DL, PtrVT));
3975
3976 // Get the number of bits that the word must be rotated left in order
3977 // to bring the field to the top bits of a GR32.
3978 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
3979 DAG.getConstant(3, DL, PtrVT));
3980 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
3981
3982 // Get the complementing shift amount, for rotating a field in the top
3983 // bits back to its proper position.
3984 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
3985 DAG.getConstant(0, DL, WideVT), BitShift);
3986
3987 // Extend the source operand to 32 bits and prepare it for the inner loop.
3988 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
3989 // operations require the source to be shifted in advance. (This shift
3990 // can be folded if the source is constant.) For AND and NAND, the lower
3991 // bits must be set, while for other opcodes they should be left clear.
3992 if (Opcode != SystemZISD::ATOMIC_SWAPW)
3993 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
3994 DAG.getConstant(32 - BitSize, DL, WideVT));
3995 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
3996 Opcode == SystemZISD::ATOMIC_LOADW_NAND)
3997 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
3998 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
3999
4000 // Construct the ATOMIC_LOADW_* node.
4001 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
4002 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
4003 DAG.getConstant(BitSize, DL, WideVT) };
4004 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
4005 NarrowVT, MMO);
4006
4007 // Rotate the result of the final CS so that the field is in the lower
4008 // bits of a GR32, then truncate it.
4009 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
4010 DAG.getConstant(BitSize, DL, WideVT));
4011 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
4012
4013 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
4014 return DAG.getMergeValues(RetOps, DL);
4015}
4016
4017// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
4018// into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
4019// operations into additions.
4020SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
4021 SelectionDAG &DAG) const {
4022 auto *Node = cast<AtomicSDNode>(Op.getNode());
4023 EVT MemVT = Node->getMemoryVT();
4024 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
4025 // A full-width operation.
4026 assert(Op.getValueType() == MemVT && "Mismatched VTs")(static_cast <bool> (Op.getValueType() == MemVT &&
"Mismatched VTs") ? void (0) : __assert_fail ("Op.getValueType() == MemVT && \"Mismatched VTs\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 4026, __extension__ __PRETTY_FUNCTION__))
;
4027 SDValue Src2 = Node->getVal();
4028 SDValue NegSrc2;
4029 SDLoc DL(Src2);
4030
4031 if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
4032 // Use an addition if the operand is constant and either LAA(G) is
4033 // available or the negative value is in the range of A(G)FHI.
4034 int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
4035 if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
4036 NegSrc2 = DAG.getConstant(Value, DL, MemVT);
4037 } else if (Subtarget.hasInterlockedAccess1())
4038 // Use LAA(G) if available.
4039 NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
4040 Src2);
4041
4042 if (NegSrc2.getNode())
4043 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
4044 Node->getChain(), Node->getBasePtr(), NegSrc2,
4045 Node->getMemOperand());
4046
4047 // Use the node as-is.
4048 return Op;
4049 }
4050
4051 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
4052}
4053
4054// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
4055SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
4056 SelectionDAG &DAG) const {
4057 auto *Node = cast<AtomicSDNode>(Op.getNode());
4058 SDValue ChainIn = Node->getOperand(0);
4059 SDValue Addr = Node->getOperand(1);
4060 SDValue CmpVal = Node->getOperand(2);
4061 SDValue SwapVal = Node->getOperand(3);
4062 MachineMemOperand *MMO = Node->getMemOperand();
4063 SDLoc DL(Node);
4064
4065 // We have native support for 32-bit and 64-bit compare and swap, but we
4066 // still need to expand extracting the "success" result from the CC.
4067 EVT NarrowVT = Node->getMemoryVT();
4068 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
4069 if (NarrowVT == WideVT) {
4070 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4071 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
4072 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP,
4073 DL, Tys, Ops, NarrowVT, MMO);
4074 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4075 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);
4076
4077 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
4078 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4079 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4080 return SDValue();
4081 }
4082
4083 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
4084 // via a fullword ATOMIC_CMP_SWAPW operation.
4085 int64_t BitSize = NarrowVT.getSizeInBits();
4086 EVT PtrVT = Addr.getValueType();
4087
4088 // Get the address of the containing word.
4089 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4090 DAG.getConstant(-4, DL, PtrVT));
4091
4092 // Get the number of bits that the word must be rotated left in order
4093 // to bring the field to the top bits of a GR32.
4094 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4095 DAG.getConstant(3, DL, PtrVT));
4096 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4097
4098 // Get the complementing shift amount, for rotating a field in the top
4099 // bits back to its proper position.
4100 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4101 DAG.getConstant(0, DL, WideVT), BitShift);
4102
4103 // Construct the ATOMIC_CMP_SWAPW node.
4104 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4105 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
4106 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
4107 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
4108 VTList, Ops, NarrowVT, MMO);
4109 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4110 SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ);
4111
4112 // emitAtomicCmpSwapW() will zero extend the result (original value).
4113 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
4114 DAG.getValueType(NarrowVT));
4115 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
4116 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4117 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4118 return SDValue();
4119}
4120
4121MachineMemOperand::Flags
4122SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
4123 // Because of how we convert atomic_load and atomic_store to normal loads and
4124 // stores in the DAG, we need to ensure that the MMOs are marked volatile
4125 // since DAGCombine hasn't been updated to account for atomic, but non
4126 // volatile loads. (See D57601)
4127 if (auto *SI = dyn_cast<StoreInst>(&I))
4128 if (SI->isAtomic())
4129 return MachineMemOperand::MOVolatile;
4130 if (auto *LI = dyn_cast<LoadInst>(&I))
4131 if (LI->isAtomic())
4132 return MachineMemOperand::MOVolatile;
4133 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
4134 if (AI->isAtomic())
4135 return MachineMemOperand::MOVolatile;
4136 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
4137 if (AI->isAtomic())
4138 return MachineMemOperand::MOVolatile;
4139 return MachineMemOperand::MONone;
4140}
4141
4142SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
4143 SelectionDAG &DAG) const {
4144 MachineFunction &MF = DAG.getMachineFunction();
4145 const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
4146 auto *Regs = Subtarget->getSpecialRegisters();
4147 MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
4148 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
4149 report_fatal_error("Variable-sized stack allocations are not supported "
4150 "in GHC calling convention");
4151 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
4152 Regs->getStackPointerRegister(), Op.getValueType());
4153}
4154
4155SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
4156 SelectionDAG &DAG) const {
4157 MachineFunction &MF = DAG.getMachineFunction();
4158 const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
4159 auto *Regs = Subtarget->getSpecialRegisters();
4160 MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
4161 bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
4162
4163 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
4164 report_fatal_error("Variable-sized stack allocations are not supported "
4165 "in GHC calling convention");
4166
4167 SDValue Chain = Op.getOperand(0);
4168 SDValue NewSP = Op.getOperand(1);
4169 SDValue Backchain;
4170 SDLoc DL(Op);
4171
4172 if (StoreBackchain) {
4173 SDValue OldSP = DAG.getCopyFromReg(
4174 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
4175 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4176 MachinePointerInfo());
4177 }
4178
4179 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
4180
4181 if (StoreBackchain)
4182 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4183 MachinePointerInfo());
4184
4185 return Chain;
4186}
4187
4188SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
4189 SelectionDAG &DAG) const {
4190 bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
4191 if (!IsData)
4192 // Just preserve the chain.
4193 return Op.getOperand(0);
4194
4195 SDLoc DL(Op);
4196 bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
4197 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
4198 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
4199 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
4200 Op.getOperand(1)};
4201 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
4202 Node->getVTList(), Ops,
4203 Node->getMemoryVT(), Node->getMemOperand());
4204}
4205
4206// Convert condition code in CCReg to an i32 value.
4207static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) {
4208 SDLoc DL(CCReg);
4209 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
4210 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
4211 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
4212}
4213
4214SDValue
4215SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4216 SelectionDAG &DAG) const {
4217 unsigned Opcode, CCValid;
4218 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
4219 assert(Op->getNumValues() == 2 && "Expected only CC result and chain")(static_cast <bool> (Op->getNumValues() == 2 &&
"Expected only CC result and chain") ? void (0) : __assert_fail
("Op->getNumValues() == 2 && \"Expected only CC result and chain\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 4219, __extension__ __PRETTY_FUNCTION__))
;
4220 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
4221 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
4222 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
4223 return SDValue();
4224 }
4225
4226 return SDValue();
4227}
4228
4229SDValue
4230SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4231 SelectionDAG &DAG) const {
4232 unsigned Opcode, CCValid;
4233 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
4234 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
4235 if (Op->getNumValues() == 1)
4236 return getCCResult(DAG, SDValue(Node, 0));
4237 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result")(static_cast <bool> (Op->getNumValues() == 2 &&
"Expected a CC and non-CC result") ? void (0) : __assert_fail
("Op->getNumValues() == 2 && \"Expected a CC and non-CC result\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 4237, __extension__ __PRETTY_FUNCTION__))
;
4238 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
4239 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
4240 }
4241
4242 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4243 switch (Id) {
4244 case Intrinsic::thread_pointer:
4245 return lowerThreadPointer(SDLoc(Op), DAG);
4246
4247 case Intrinsic::s390_vpdi:
4248 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
4249 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4250
4251 case Intrinsic::s390_vperm:
4252 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
4253 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4254
4255 case Intrinsic::s390_vuphb:
4256 case Intrinsic::s390_vuphh:
4257 case Intrinsic::s390_vuphf:
4258 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
4259 Op.getOperand(1));
4260
4261 case Intrinsic::s390_vuplhb:
4262 case Intrinsic::s390_vuplhh:
4263 case Intrinsic::s390_vuplhf:
4264 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
4265 Op.getOperand(1));
4266
4267 case Intrinsic::s390_vuplb:
4268 case Intrinsic::s390_vuplhw:
4269 case Intrinsic::s390_vuplf:
4270 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
4271 Op.getOperand(1));
4272
4273 case Intrinsic::s390_vupllb:
4274 case Intrinsic::s390_vupllh:
4275 case Intrinsic::s390_vupllf:
4276 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
4277 Op.getOperand(1));
4278
4279 case Intrinsic::s390_vsumb:
4280 case Intrinsic::s390_vsumh:
4281 case Intrinsic::s390_vsumgh:
4282 case Intrinsic::s390_vsumgf:
4283 case Intrinsic::s390_vsumqf:
4284 case Intrinsic::s390_vsumqg:
4285 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
4286 Op.getOperand(1), Op.getOperand(2));
4287 }
4288
4289 return SDValue();
4290}
4291
4292namespace {
4293// Says that SystemZISD operation Opcode can be used to perform the equivalent
4294// of a VPERM with permute vector Bytes. If Opcode takes three operands,
4295// Operand is the constant third operand, otherwise it is the number of
4296// bytes in each element of the result.
4297struct Permute {
4298 unsigned Opcode;
4299 unsigned Operand;
4300 unsigned char Bytes[SystemZ::VectorBytes];
4301};
4302}
4303
4304static const Permute PermuteForms[] = {
4305 // VMRHG
4306 { SystemZISD::MERGE_HIGH, 8,
4307 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
4308 // VMRHF
4309 { SystemZISD::MERGE_HIGH, 4,
4310 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
4311 // VMRHH
4312 { SystemZISD::MERGE_HIGH, 2,
4313 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
4314 // VMRHB
4315 { SystemZISD::MERGE_HIGH, 1,
4316 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
4317 // VMRLG
4318 { SystemZISD::MERGE_LOW, 8,
4319 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
4320 // VMRLF
4321 { SystemZISD::MERGE_LOW, 4,
4322 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
4323 // VMRLH
4324 { SystemZISD::MERGE_LOW, 2,
4325 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
4326 // VMRLB
4327 { SystemZISD::MERGE_LOW, 1,
4328 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
4329 // VPKG
4330 { SystemZISD::PACK, 4,
4331 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
4332 // VPKF
4333 { SystemZISD::PACK, 2,
4334 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
4335 // VPKH
4336 { SystemZISD::PACK, 1,
4337 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
4338 // VPDI V1, V2, 4 (low half of V1, high half of V2)
4339 { SystemZISD::PERMUTE_DWORDS, 4,
4340 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
4341 // VPDI V1, V2, 1 (high half of V1, low half of V2)
4342 { SystemZISD::PERMUTE_DWORDS, 1,
4343 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
4344};
4345
4346// Called after matching a vector shuffle against a particular pattern.
4347// Both the original shuffle and the pattern have two vector operands.
4348// OpNos[0] is the operand of the original shuffle that should be used for
4349// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
4350// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
4351// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
4352// for operands 0 and 1 of the pattern.
4353static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
4354 if (OpNos[0] < 0) {
4355 if (OpNos[1] < 0)
4356 return false;
4357 OpNo0 = OpNo1 = OpNos[1];
4358 } else if (OpNos[1] < 0) {
4359 OpNo0 = OpNo1 = OpNos[0];
4360 } else {
4361 OpNo0 = OpNos[0];
4362 OpNo1 = OpNos[1];
4363 }
4364 return true;
4365}
4366
4367// Bytes is a VPERM-like permute vector, except that -1 is used for
4368// undefined bytes. Return true if the VPERM can be implemented using P.
4369// When returning true set OpNo0 to the VPERM operand that should be
4370// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
4371//
4372// For example, if swapping the VPERM operands allows P to match, OpNo0
4373// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
4374// operand, but rewriting it to use two duplicated operands allows it to
4375// match P, then OpNo0 and OpNo1 will be the same.
4376static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
4377 unsigned &OpNo0, unsigned &OpNo1) {
4378 int OpNos[] = { -1, -1 };
4379 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
4380 int Elt = Bytes[I];
4381 if (Elt >= 0) {
4382 // Make sure that the two permute vectors use the same suboperand
4383 // byte number. Only the operand numbers (the high bits) are
4384 // allowed to differ.
4385 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
4386 return false;
4387 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
4388 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
4389 // Make sure that the operand mappings are consistent with previous
4390 // elements.
4391 if (OpNos[ModelOpNo] == 1 - RealOpNo)
4392 return false;
4393 OpNos[ModelOpNo] = RealOpNo;
4394 }
4395 }
4396 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
4397}
4398
4399// As above, but search for a matching permute.
4400static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
4401 unsigned &OpNo0, unsigned &OpNo1) {
4402 for (auto &P : PermuteForms)
4403 if (matchPermute(Bytes, P, OpNo0, OpNo1))
4404 return &P;
4405 return nullptr;
4406}
4407
4408// Bytes is a VPERM-like permute vector, except that -1 is used for
4409// undefined bytes. This permute is an operand of an outer permute.
4410// See whether redistributing the -1 bytes gives a shuffle that can be
4411// implemented using P. If so, set Transform to a VPERM-like permute vector
4412// that, when applied to the result of P, gives the original permute in Bytes.
4413static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
4414 const Permute &P,
4415 SmallVectorImpl<int> &Transform) {
4416 unsigned To = 0;
4417 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
4418 int Elt = Bytes[From];
4419 if (Elt < 0)
4420 // Byte number From of the result is undefined.
4421 Transform[From] = -1;
4422 else {
4423 while (P.Bytes[To] != Elt) {
4424 To += 1;
4425 if (To == SystemZ::VectorBytes)
4426 return false;
4427 }
4428 Transform[From] = To;
4429 }
4430 }
4431 return true;
4432}
4433
4434// As above, but search for a matching permute.
4435static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
4436 SmallVectorImpl<int> &Transform) {
4437 for (auto &P : PermuteForms)
4438 if (matchDoublePermute(Bytes, P, Transform))
4439 return &P;
4440 return nullptr;
4441}
4442
4443// Convert the mask of the given shuffle op into a byte-level mask,
4444// as if it had type vNi8.
4445static bool getVPermMask(SDValue ShuffleOp,
4446 SmallVectorImpl<int> &Bytes) {
4447 EVT VT = ShuffleOp.getValueType();
4448 unsigned NumElements = VT.getVectorNumElements();
4449 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4450
4451 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
4452 Bytes.resize(NumElements * BytesPerElement, -1);
4453 for (unsigned I = 0; I < NumElements; ++I) {
4454 int Index = VSN->getMaskElt(I);
4455 if (Index >= 0)
4456 for (unsigned J = 0; J < BytesPerElement; ++J)
4457 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
4458 }
4459 return true;
4460 }
4461 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
4462 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
4463 unsigned Index = ShuffleOp.getConstantOperandVal(1);
4464 Bytes.resize(NumElements * BytesPerElement, -1);
4465 for (unsigned I = 0; I < NumElements; ++I)
4466 for (unsigned J = 0; J < BytesPerElement; ++J)
4467 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
4468 return true;
4469 }
4470 return false;
4471}
4472
4473// Bytes is a VPERM-like permute vector, except that -1 is used for
4474// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
4475// the result come from a contiguous sequence of bytes from one input.
4476// Set Base to the selector for the first byte if so.
4477static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
4478 unsigned BytesPerElement, int &Base) {
4479 Base = -1;
4480 for (unsigned I = 0; I < BytesPerElement; ++I) {
4481 if (Bytes[Start + I] >= 0) {
4482 unsigned Elem = Bytes[Start + I];
4483 if (Base < 0) {
4484 Base = Elem - I;
4485 // Make sure the bytes would come from one input operand.
4486 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
4487 return false;
4488 } else if (unsigned(Base) != Elem - I)
4489 return false;
4490 }
4491 }
4492 return true;
4493}
4494
4495// Bytes is a VPERM-like permute vector, except that -1 is used for
4496// undefined bytes. Return true if it can be performed using VSLDB.
4497// When returning true, set StartIndex to the shift amount and OpNo0
4498// and OpNo1 to the VPERM operands that should be used as the first
4499// and second shift operand respectively.
4500static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
4501 unsigned &StartIndex, unsigned &OpNo0,
4502 unsigned &OpNo1) {
4503 int OpNos[] = { -1, -1 };
4504 int Shift = -1;
4505 for (unsigned I = 0; I < 16; ++I) {
4506 int Index = Bytes[I];
4507 if (Index >= 0) {
4508 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
4509 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
4510 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
4511 if (Shift < 0)
4512 Shift = ExpectedShift;
4513 else if (Shift != ExpectedShift)
4514 return false;
4515 // Make sure that the operand mappings are consistent with previous
4516 // elements.
4517 if (OpNos[ModelOpNo] == 1 - RealOpNo)
4518 return false;
4519 OpNos[ModelOpNo] = RealOpNo;
4520 }
4521 }
4522 StartIndex = Shift;
4523 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
4524}
4525
4526// Create a node that performs P on operands Op0 and Op1, casting the
4527// operands to the appropriate type. The type of the result is determined by P.
4528static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
4529 const Permute &P, SDValue Op0, SDValue Op1) {
4530 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
4531 // elements of a PACK are twice as wide as the outputs.
4532 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
4533 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
4534 P.Operand);
4535 // Cast both operands to the appropriate type.
4536 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
4537 SystemZ::VectorBytes / InBytes);
4538 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
4539 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
4540 SDValue Op;
4541 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
4542 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
4543 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
4544 } else if (P.Opcode == SystemZISD::PACK) {
4545 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
4546 SystemZ::VectorBytes / P.Operand);
4547 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
4548 } else {
4549 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
4550 }
4551 return Op;
4552}
4553
4554static bool isZeroVector(SDValue N) {
4555 if (N->getOpcode() == ISD::BITCAST)
4556 N = N->getOperand(0);
4557 if (N->getOpcode() == ISD::SPLAT_VECTOR)
4558 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
4559 return Op->getZExtValue() == 0;
4560 return ISD::isBuildVectorAllZeros(N.getNode());
4561}
4562
4563// Return the index of the zero/undef vector, or UINT32_MAX if not found.
4564static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
4565 for (unsigned I = 0; I < Num ; I++)
4566 if (isZeroVector(Ops[I]))
4567 return I;
4568 return UINT32_MAX(4294967295U);
4569}
4570
4571// Bytes is a VPERM-like permute vector, except that -1 is used for
4572// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
4573// VSLDB or VPERM.
4574static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
4575 SDValue *Ops,
4576 const SmallVectorImpl<int> &Bytes) {
4577 for (unsigned I = 0; I < 2; ++I)
4578 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
4579
4580 // First see whether VSLDB can be used.
4581 unsigned StartIndex, OpNo0, OpNo1;
4582 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
4583 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
4584 Ops[OpNo1],
4585 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
4586
4587 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
4588 // eliminate a zero vector by reusing any zero index in the permute vector.
4589 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
4590 if (ZeroVecIdx != UINT32_MAX(4294967295U)) {
4591 bool MaskFirst = true;
4592 int ZeroIdx = -1;
4593 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
4594 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
4595 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
4596 if (OpNo == ZeroVecIdx && I == 0) {
4597 // If the first byte is zero, use mask as first operand.
4598 ZeroIdx = 0;
4599 break;
4600 }
4601 if (OpNo != ZeroVecIdx && Byte == 0) {
4602 // If mask contains a zero, use it by placing that vector first.
4603 ZeroIdx = I + SystemZ::VectorBytes;
4604 MaskFirst = false;
4605 break;
4606 }
4607 }
4608 if (ZeroIdx != -1) {
4609 SDValue IndexNodes[SystemZ::VectorBytes];
4610 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
4611 if (Bytes[I] >= 0) {
4612 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
4613 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
4614 if (OpNo == ZeroVecIdx)
4615 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
4616 else {
4617 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
4618 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
4619 }
4620 } else
4621 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
4622 }
4623 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
4624 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
4625 if (MaskFirst)
4626 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
4627 Mask);
4628 else
4629 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
4630 Mask);
4631 }
4632 }
4633
4634 SDValue IndexNodes[SystemZ::VectorBytes];
4635 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4636 if (Bytes[I] >= 0)
4637 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
4638 else
4639 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
4640 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
4641 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
4642 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
4643}
4644
4645namespace {
4646// Describes a general N-operand vector shuffle.
4647struct GeneralShuffle {
4648 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX(2147483647 *2U +1U)) {}
4649 void addUndef();
4650 bool add(SDValue, unsigned);
4651 SDValue getNode(SelectionDAG &, const SDLoc &);
4652 void tryPrepareForUnpack();
4653 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
4654 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
4655
4656 // The operands of the shuffle.
4657 SmallVector<SDValue, SystemZ::VectorBytes> Ops;
4658
4659 // Index I is -1 if byte I of the result is undefined. Otherwise the
4660 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
4661 // Bytes[I] / SystemZ::VectorBytes.
4662 SmallVector<int, SystemZ::VectorBytes> Bytes;
4663
4664 // The type of the shuffle result.
4665 EVT VT;
4666
4667 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
4668 unsigned UnpackFromEltSize;
4669};
4670}
4671
4672// Add an extra undefined element to the shuffle.
4673void GeneralShuffle::addUndef() {
4674 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4675 for (unsigned I = 0; I < BytesPerElement; ++I)
4676 Bytes.push_back(-1);
4677}
4678
4679// Add an extra element to the shuffle, taking it from element Elem of Op.
4680// A null Op indicates a vector input whose value will be calculated later;
4681// there is at most one such input per shuffle and it always has the same
4682// type as the result. Aborts and returns false if the source vector elements
4683// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
4684// LLVM they become implicitly extended, but this is rare and not optimized.
4685bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
4686 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4687
4688 // The source vector can have wider elements than the result,
4689 // either through an explicit TRUNCATE or because of type legalization.
4690 // We want the least significant part.
4691 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
4692 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
4693
4694 // Return false if the source elements are smaller than their destination
4695 // elements.
4696 if (FromBytesPerElement < BytesPerElement)
4697 return false;
4698
4699 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
4700 (FromBytesPerElement - BytesPerElement));
4701
4702 // Look through things like shuffles and bitcasts.
4703 while (Op.getNode()) {
4704 if (Op.getOpcode() == ISD::BITCAST)
4705 Op = Op.getOperand(0);
4706 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
4707 // See whether the bytes we need come from a contiguous part of one
4708 // operand.
4709 SmallVector<int, SystemZ::VectorBytes> OpBytes;
4710 if (!getVPermMask(Op, OpBytes))
4711 break;
4712 int NewByte;
4713 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
4714 break;
4715 if (NewByte < 0) {
4716 addUndef();
4717 return true;
4718 }
4719 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
4720 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
4721 } else if (Op.isUndef()) {
4722 addUndef();
4723 return true;
4724 } else
4725 break;
4726 }
4727
4728 // Make sure that the source of the extraction is in Ops.
4729 unsigned OpNo = 0;
4730 for (; OpNo < Ops.size(); ++OpNo)
4731 if (Ops[OpNo] == Op)
4732 break;
4733 if (OpNo == Ops.size())
4734 Ops.push_back(Op);
4735
4736 // Add the element to Bytes.
4737 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
4738 for (unsigned I = 0; I < BytesPerElement; ++I)
4739 Bytes.push_back(Base + I);
4740
4741 return true;
4742}
4743
4744// Return SDNodes for the completed shuffle.
4745SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
4746 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector")(static_cast <bool> (Bytes.size() == SystemZ::VectorBytes
&& "Incomplete vector") ? void (0) : __assert_fail (
"Bytes.size() == SystemZ::VectorBytes && \"Incomplete vector\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 4746, __extension__ __PRETTY_FUNCTION__))
;
4747
4748 if (Ops.size() == 0)
4749 return DAG.getUNDEF(VT);
4750
4751 // Use a single unpack if possible as the last operation.
4752 tryPrepareForUnpack();
4753
4754 // Make sure that there are at least two shuffle operands.
4755 if (Ops.size() == 1)
4756 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
4757
4758 // Create a tree of shuffles, deferring root node until after the loop.
4759 // Try to redistribute the undefined elements of non-root nodes so that
4760 // the non-root shuffles match something like a pack or merge, then adjust
4761 // the parent node's permute vector to compensate for the new order.
4762 // Among other things, this copes with vectors like <2 x i16> that were
4763 // padded with undefined elements during type legalization.
4764 //
4765 // In the best case this redistribution will lead to the whole tree
4766 // using packs and merges. It should rarely be a loss in other cases.
4767 unsigned Stride = 1;
4768 for (; Stride * 2 < Ops.size(); Stride *= 2) {
4769 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
4770 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
4771
4772 // Create a mask for just these two operands.
4773 SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes);
4774 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
4775 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
4776 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
4777 if (OpNo == I)
4778 NewBytes[J] = Byte;
4779 else if (OpNo == I + Stride)
4780 NewBytes[J] = SystemZ::VectorBytes + Byte;
4781 else
4782 NewBytes[J] = -1;
4783 }
4784 // See if it would be better to reorganize NewMask to avoid using VPERM.
4785 SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
4786 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
4787 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
4788 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
4789 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
4790 if (NewBytes[J] >= 0) {
4791 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&(static_cast <bool> (unsigned(NewBytesMap[J]) < SystemZ
::VectorBytes && "Invalid double permute") ? void (0)
: __assert_fail ("unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && \"Invalid double permute\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 4792, __extension__ __PRETTY_FUNCTION__))
4792 "Invalid double permute")(static_cast <bool> (unsigned(NewBytesMap[J]) < SystemZ
::VectorBytes && "Invalid double permute") ? void (0)
: __assert_fail ("unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && \"Invalid double permute\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 4792, __extension__ __PRETTY_FUNCTION__))
;
4793 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
4794 } else
4795 assert(NewBytesMap[J] < 0 && "Invalid double permute")(static_cast <bool> (NewBytesMap[J] < 0 && "Invalid double permute"
) ? void (0) : __assert_fail ("NewBytesMap[J] < 0 && \"Invalid double permute\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 4795, __extension__ __PRETTY_FUNCTION__))
;
4796 }
4797 } else {
4798 // Just use NewBytes on the operands.
4799 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
4800 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
4801 if (NewBytes[J] >= 0)
4802 Bytes[J] = I * SystemZ::VectorBytes + J;
4803 }
4804 }
4805 }
4806
4807 // Now we just have 2 inputs. Put the second operand in Ops[1].
4808 if (Stride > 1) {
4809 Ops[1] = Ops[Stride];
4810 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4811 if (Bytes[I] >= int(SystemZ::VectorBytes))
4812 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
4813 }
4814
4815 // Look for an instruction that can do the permute without resorting
4816 // to VPERM.
4817 unsigned OpNo0, OpNo1;
4818 SDValue Op;
4819 if (unpackWasPrepared() && Ops[1].isUndef())
4820 Op = Ops[0];
4821 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
4822 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
4823 else
4824 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
4825
4826 Op = insertUnpackIfPrepared(DAG, DL, Op);
4827
4828 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
4829}
4830
4831#ifndef NDEBUG
4832static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
4833 dbgs() << Msg.c_str() << " { ";
4834 for (unsigned i = 0; i < Bytes.size(); i++)
4835 dbgs() << Bytes[i] << " ";
4836 dbgs() << "}\n";
4837}
4838#endif
4839
4840// If the Bytes vector matches an unpack operation, prepare to do the unpack
4841// after all else by removing the zero vector and the effect of the unpack on
4842// Bytes.
4843void GeneralShuffle::tryPrepareForUnpack() {
4844 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
4845 if (ZeroVecOpNo == UINT32_MAX(4294967295U) || Ops.size() == 1)
4846 return;
4847
4848 // Only do this if removing the zero vector reduces the depth, otherwise
4849 // the critical path will increase with the final unpack.
4850 if (Ops.size() > 2 &&
4851 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
4852 return;
4853
4854 // Find an unpack that would allow removing the zero vector from Ops.
4855 UnpackFromEltSize = 1;
4856 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
4857 bool MatchUnpack = true;
4858 SmallVector<int, SystemZ::VectorBytes> SrcBytes;
4859 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
4860 unsigned ToEltSize = UnpackFromEltSize * 2;
4861 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
4862 if (!IsZextByte)
4863 SrcBytes.push_back(Bytes[Elt]);
4864 if (Bytes[Elt] != -1) {
4865 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
4866 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
4867 MatchUnpack = false;
4868 break;
4869 }
4870 }
4871 }
4872 if (MatchUnpack) {
4873 if (Ops.size() == 2) {
4874 // Don't use unpack if a single source operand needs rearrangement.
4875 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
4876 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
4877 UnpackFromEltSize = UINT_MAX(2147483647 *2U +1U);
4878 return;
4879 }
4880 }
4881 break;
4882 }
4883 }
4884 if (UnpackFromEltSize > 4)
4885 return;
4886
4887 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dbgs() << "Preparing for final unpack of element size "
<< UnpackFromEltSize << ". Zero vector is Op#" <<
ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:"
);; } } while (false)
4888 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNodo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dbgs() << "Preparing for final unpack of element size "
<< UnpackFromEltSize << ". Zero vector is Op#" <<
ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:"
);; } } while (false)
4889 << ".\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dbgs() << "Preparing for final unpack of element size "
<< UnpackFromEltSize << ". Zero vector is Op#" <<
ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:"
);; } } while (false)
4890 dumpBytes(Bytes, "Original Bytes vector:");)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dbgs() << "Preparing for final unpack of element size "
<< UnpackFromEltSize << ". Zero vector is Op#" <<
ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:"
);; } } while (false)
;
4891
4892 // Apply the unpack in reverse to the Bytes array.
4893 unsigned B = 0;
4894 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
4895 Elt += UnpackFromEltSize;
4896 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
4897 Bytes[B] = Bytes[Elt];
4898 }
4899 while (B < SystemZ::VectorBytes)
4900 Bytes[B++] = -1;
4901
4902 // Remove the zero vector from Ops
4903 Ops.erase(&Ops[ZeroVecOpNo]);
4904 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4905 if (Bytes[I] >= 0) {
4906 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
4907 if (OpNo > ZeroVecOpNo)
4908 Bytes[I] -= SystemZ::VectorBytes;
4909 }
4910
4911 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:"
); dbgs() << "\n";; } } while (false)
4912 dbgs() << "\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:"
); dbgs() << "\n";; } } while (false)
;
4913}
4914
4915SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
4916 const SDLoc &DL,
4917 SDValue Op) {
4918 if (!unpackWasPrepared())
4919 return Op;
4920 unsigned InBits = UnpackFromEltSize * 8;
4921 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
4922 SystemZ::VectorBits / InBits);
4923 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
4924 unsigned OutBits = InBits * 2;
4925 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
4926 SystemZ::VectorBits / OutBits);
4927 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
4928}
4929
4930// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
4931static bool isScalarToVector(SDValue Op) {
4932 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
4933 if (!Op.getOperand(I).isUndef())
4934 return false;
4935 return true;
4936}
4937
4938// Return a vector of type VT that contains Value in the first element.
4939// The other elements don't matter.
4940static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4941 SDValue Value) {
4942 // If we have a constant, replicate it to all elements and let the
4943 // BUILD_VECTOR lowering take care of it.
4944 if (Value.getOpcode() == ISD::Constant ||
4945 Value.getOpcode() == ISD::ConstantFP) {
4946 SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);
4947 return DAG.getBuildVector(VT, DL, Ops);
4948 }
4949 if (Value.isUndef())
4950 return DAG.getUNDEF(VT);
4951 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
4952}
4953
4954// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
4955// element 1. Used for cases in which replication is cheap.
4956static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
4957 SDValue Op0, SDValue Op1) {
4958 if (Op0.isUndef()) {
4959 if (Op1.isUndef())
4960 return DAG.getUNDEF(VT);
4961 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
4962 }
4963 if (Op1.isUndef())
4964 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
4965 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
4966 buildScalarToVector(DAG, DL, VT, Op0),
4967 buildScalarToVector(DAG, DL, VT, Op1));
4968}
4969
4970// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
4971// vector for them.
4972static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
4973 SDValue Op1) {
4974 if (Op0.isUndef() && Op1.isUndef())
4975 return DAG.getUNDEF(MVT::v2i64);
4976 // If one of the two inputs is undefined then replicate the other one,
4977 // in order to avoid using another register unnecessarily.
4978 if (Op0.isUndef())
4979 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
4980 else if (Op1.isUndef())
4981 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4982 else {
4983 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4984 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
4985 }
4986 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
4987}
4988
4989// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
4990// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
4991// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
4992// would benefit from this representation and return it if so.
4993static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
4994 BuildVectorSDNode *BVN) {
4995 EVT VT = BVN->getValueType(0);
4996 unsigned NumElements = VT.getVectorNumElements();
4997
4998 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
4999 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
5000 // need a BUILD_VECTOR, add an additional placeholder operand for that
5001 // BUILD_VECTOR and store its operands in ResidueOps.
5002 GeneralShuffle GS(VT);
5003 SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps;
5004 bool FoundOne = false;
5005 for (unsigned I = 0; I < NumElements; ++I) {
5006 SDValue Op = BVN->getOperand(I);
5007 if (Op.getOpcode() == ISD::TRUNCATE)
5008 Op = Op.getOperand(0);
5009 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5010 Op.getOperand(1).getOpcode() == ISD::Constant) {
5011 unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
5012 if (!GS.add(Op.getOperand(0), Elem))
5013 return SDValue();
5014 FoundOne = true;
5015 } else if (Op.isUndef()) {
5016 GS.addUndef();
5017 } else {
5018 if (!GS.add(SDValue(), ResidueOps.size()))
5019 return SDValue();
5020 ResidueOps.push_back(BVN->getOperand(I));
5021 }
5022 }
5023
5024 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
5025 if (!FoundOne)
5026 return SDValue();
5027
5028 // Create the BUILD_VECTOR for the remaining elements, if any.
5029 if (!ResidueOps.empty()) {
5030 while (ResidueOps.size() < NumElements)
5031 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
5032 for (auto &Op : GS.Ops) {
5033 if (!Op.getNode()) {
5034 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
5035 break;
5036 }
5037 }
5038 }
5039 return GS.getNode(DAG, SDLoc(BVN));
5040}
5041
5042bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
5043 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
5044 return true;
5045 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
5046 return true;
5047 return false;
5048}
5049
5050// Combine GPR scalar values Elems into a vector of type VT.
5051SDValue
5052SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5053 SmallVectorImpl<SDValue> &Elems) const {
5054 // See whether there is a single replicated value.
5055 SDValue Single;
5056 unsigned int NumElements = Elems.size();
5057 unsigned int Count = 0;
5058 for (auto Elem : Elems) {
5059 if (!Elem.isUndef()) {
5060 if (!Single.getNode())
5061 Single = Elem;
5062 else if (Elem != Single) {
5063 Single = SDValue();
5064 break;
5065 }
5066 Count += 1;
5067 }
5068 }
5069 // There are three cases here:
5070 //
5071 // - if the only defined element is a loaded one, the best sequence
5072 // is a replicating load.
5073 //
5074 // - otherwise, if the only defined element is an i64 value, we will
5075 // end up with the same VLVGP sequence regardless of whether we short-cut
5076 // for replication or fall through to the later code.
5077 //
5078 // - otherwise, if the only defined element is an i32 or smaller value,
5079 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
5080 // This is only a win if the single defined element is used more than once.
5081 // In other cases we're better off using a single VLVGx.
5082 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
5083 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
5084
5085 // If all elements are loads, use VLREP/VLEs (below).
5086 bool AllLoads = true;
5087 for (auto Elem : Elems)
5088 if (!isVectorElementLoad(Elem)) {
5089 AllLoads = false;
5090 break;
5091 }
5092
5093 // The best way of building a v2i64 from two i64s is to use VLVGP.
5094 if (VT == MVT::v2i64 && !AllLoads)
5095 return joinDwords(DAG, DL, Elems[0], Elems[1]);
5096
5097 // Use a 64-bit merge high to combine two doubles.
5098 if (VT == MVT::v2f64 && !AllLoads)
5099 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5100
5101 // Build v4f32 values directly from the FPRs:
5102 //
5103 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
5104 // V V VMRHF
5105 // <ABxx> <CDxx>
5106 // V VMRHG
5107 // <ABCD>
5108 if (VT == MVT::v4f32 && !AllLoads) {
5109 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5110 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
5111 // Avoid unnecessary undefs by reusing the other operand.
5112 if (Op01.isUndef())
5113 Op01 = Op23;
5114 else if (Op23.isUndef())
5115 Op23 = Op01;
5116 // Merging identical replications is a no-op.
5117 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
5118 return Op01;
5119 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
5120 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
5121 SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
5122 DL, MVT::v2i64, Op01, Op23);
5123 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5124 }
5125
5126 // Collect the constant terms.
5127 SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue());
5128 SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
5129
5130 unsigned NumConstants = 0;
5131 for (unsigned I = 0; I < NumElements; ++I) {
5132 SDValue Elem = Elems[I];
5133 if (Elem.getOpcode() == ISD::Constant ||
5134 Elem.getOpcode() == ISD::ConstantFP) {
5135 NumConstants += 1;
5136 Constants[I] = Elem;
5137 Done[I] = true;
5138 }
5139 }
5140 // If there was at least one constant, fill in the other elements of
5141 // Constants with undefs to get a full vector constant and use that
5142 // as the starting point.
5143 SDValue Result;
5144 SDValue ReplicatedVal;
5145 if (NumConstants > 0) {
5146 for (unsigned I = 0; I < NumElements; ++I)
5147 if (!Constants[I].getNode())
5148 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
5149 Result = DAG.getBuildVector(VT, DL, Constants);
5150 } else {
5151 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
5152 // avoid a false dependency on any previous contents of the vector
5153 // register.
5154
5155 // Use a VLREP if at least one element is a load. Make sure to replicate
5156 // the load with the most elements having its value.
5157 std::map<const SDNode*, unsigned> UseCounts;
5158 SDNode *LoadMaxUses = nullptr;
5159 for (unsigned I = 0; I < NumElements; ++I)
5160 if (isVectorElementLoad(Elems[I])) {
5161 SDNode *Ld = Elems[I].getNode();
5162 UseCounts[Ld]++;
5163 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
5164 LoadMaxUses = Ld;
5165 }
5166 if (LoadMaxUses != nullptr) {
5167 ReplicatedVal = SDValue(LoadMaxUses, 0);
5168 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
5169 } else {
5170 // Try to use VLVGP.
5171 unsigned I1 = NumElements / 2 - 1;
5172 unsigned I2 = NumElements - 1;
5173 bool Def1 = !Elems[I1].isUndef();
5174 bool Def2 = !Elems[I2].isUndef();
5175 if (Def1 || Def2) {
5176 SDValue Elem1 = Elems[Def1 ? I1 : I2];
5177 SDValue Elem2 = Elems[Def2 ? I2 : I1];
5178 Result = DAG.getNode(ISD::BITCAST, DL, VT,
5179 joinDwords(DAG, DL, Elem1, Elem2));
5180 Done[I1] = true;
5181 Done[I2] = true;
5182 } else
5183 Result = DAG.getUNDEF(VT);
5184 }
5185 }
5186
5187 // Use VLVGx to insert the other elements.
5188 for (unsigned I = 0; I < NumElements; ++I)
5189 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
5190 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
5191 DAG.getConstant(I, DL, MVT::i32));
5192 return Result;
5193}
5194
5195SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
5196 SelectionDAG &DAG) const {
5197 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
5198 SDLoc DL(Op);
5199 EVT VT = Op.getValueType();
5200
5201 if (BVN->isConstant()) {
5202 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
5203 return Op;
5204
5205 // Fall back to loading it from memory.
5206 return SDValue();
5207 }
5208
5209 // See if we should use shuffles to construct the vector from other vectors.
5210 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
5211 return Res;
5212
5213 // Detect SCALAR_TO_VECTOR conversions.
5214 if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op))
5215 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
5216
5217 // Otherwise use buildVector to build the vector up from GPRs.
5218 unsigned NumElements = Op.getNumOperands();
5219 SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements);
5220 for (unsigned I = 0; I < NumElements; ++I)
5221 Ops[I] = Op.getOperand(I);
5222 return buildVector(DAG, DL, VT, Ops);
5223}
5224
5225SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
5226 SelectionDAG &DAG) const {
5227 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
5228 SDLoc DL(Op);
5229 EVT VT = Op.getValueType();
5230 unsigned NumElements = VT.getVectorNumElements();
5231
5232 if (VSN->isSplat()) {
5233 SDValue Op0 = Op.getOperand(0);
5234 unsigned Index = VSN->getSplatIndex();
5235 assert(Index < VT.getVectorNumElements() &&(static_cast <bool> (Index < VT.getVectorNumElements
() && "Splat index should be defined and in first operand"
) ? void (0) : __assert_fail ("Index < VT.getVectorNumElements() && \"Splat index should be defined and in first operand\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 5236, __extension__ __PRETTY_FUNCTION__))
5236 "Splat index should be defined and in first operand")(static_cast <bool> (Index < VT.getVectorNumElements
() && "Splat index should be defined and in first operand"
) ? void (0) : __assert_fail ("Index < VT.getVectorNumElements() && \"Splat index should be defined and in first operand\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 5236, __extension__ __PRETTY_FUNCTION__))
;
5237 // See whether the value we're splatting is directly available as a scalar.
5238 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5239 Op0.getOpcode() == ISD::BUILD_VECTOR)
5240 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
5241 // Otherwise keep it as a vector-to-vector operation.
5242 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
5243 DAG.getTargetConstant(Index, DL, MVT::i32));
5244 }
5245
5246 GeneralShuffle GS(VT);
5247 for (unsigned I = 0; I < NumElements; ++I) {
5248 int Elt = VSN->getMaskElt(I);
5249 if (Elt < 0)
5250 GS.addUndef();
5251 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
5252 unsigned(Elt) % NumElements))
5253 return SDValue();
5254 }
5255 return GS.getNode(DAG, SDLoc(VSN));
5256}
5257
5258SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
5259 SelectionDAG &DAG) const {
5260 SDLoc DL(Op);
5261 // Just insert the scalar into element 0 of an undefined vector.
5262 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
5263 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
5264 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
5265}
5266
5267SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
5268 SelectionDAG &DAG) const {
5269 // Handle insertions of floating-point values.
5270 SDLoc DL(Op);
5271 SDValue Op0 = Op.getOperand(0);
5272 SDValue Op1 = Op.getOperand(1);
5273 SDValue Op2 = Op.getOperand(2);
5274 EVT VT = Op.getValueType();
5275
5276 // Insertions into constant indices of a v2f64 can be done using VPDI.
5277 // However, if the inserted value is a bitcast or a constant then it's
5278 // better to use GPRs, as below.
5279 if (VT == MVT::v2f64 &&
5280 Op1.getOpcode() != ISD::BITCAST &&
5281 Op1.getOpcode() != ISD::ConstantFP &&
5282 Op2.getOpcode() == ISD::Constant) {
5283 uint64_t Index = cast<ConstantSDNode>(Op2)->getZExtValue();
5284 unsigned Mask = VT.getVectorNumElements() - 1;
5285 if (Index <= Mask)
5286 return Op;
5287 }
5288
5289 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
5290 MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
5291 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
5292 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
5293 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
5294 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
5295 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5296}
5297
5298SDValue
5299SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
5300 SelectionDAG &DAG) const {
5301 // Handle extractions of floating-point values.
5302 SDLoc DL(Op);
5303 SDValue Op0 = Op.getOperand(0);
5304 SDValue Op1 = Op.getOperand(1);
5305 EVT VT = Op.getValueType();
5306 EVT VecVT = Op0.getValueType();
5307
5308 // Extractions of constant indices can be done directly.
5309 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
5310 uint64_t Index = CIndexN->getZExtValue();
5311 unsigned Mask = VecVT.getVectorNumElements() - 1;
5312 if (Index <= Mask)
5313 return Op;
5314 }
5315
5316 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
5317 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
5318 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
5319 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
5320 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
5321 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5322}
5323
5324SDValue SystemZTargetLowering::
5325lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5326 SDValue PackedOp = Op.getOperand(0);
5327 EVT OutVT = Op.getValueType();
5328 EVT InVT = PackedOp.getValueType();
5329 unsigned ToBits = OutVT.getScalarSizeInBits();
5330 unsigned FromBits = InVT.getScalarSizeInBits();
5331 do {
5332 FromBits *= 2;
5333 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
5334 SystemZ::VectorBits / FromBits);
5335 PackedOp =
5336 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
5337 } while (FromBits != ToBits);
5338 return PackedOp;
5339}
5340
5341// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
5342SDValue SystemZTargetLowering::
5343lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5344 SDValue PackedOp = Op.getOperand(0);
5345 SDLoc DL(Op);
5346 EVT OutVT = Op.getValueType();
5347 EVT InVT = PackedOp.getValueType();
5348 unsigned InNumElts = InVT.getVectorNumElements();
5349 unsigned OutNumElts = OutVT.getVectorNumElements();
5350 unsigned NumInPerOut = InNumElts / OutNumElts;
5351
5352 SDValue ZeroVec =
5353 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
5354
5355 SmallVector<int, 16> Mask(InNumElts);
5356 unsigned ZeroVecElt = InNumElts;
5357 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
5358 unsigned MaskElt = PackedElt * NumInPerOut;
5359 unsigned End = MaskElt + NumInPerOut - 1;
5360 for (; MaskElt < End; MaskElt++)
5361 Mask[MaskElt] = ZeroVecElt++;
5362 Mask[MaskElt] = PackedElt;
5363 }
5364 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
5365 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
5366}
5367
5368SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
5369 unsigned ByScalar) const {
5370 // Look for cases where a vector shift can use the *_BY_SCALAR form.
5371 SDValue Op0 = Op.getOperand(0);
5372 SDValue Op1 = Op.getOperand(1);
5373 SDLoc DL(Op);
5374 EVT VT = Op.getValueType();
5375 unsigned ElemBitSize = VT.getScalarSizeInBits();
5376
5377 // See whether the shift vector is a splat represented as BUILD_VECTOR.
5378 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
5379 APInt SplatBits, SplatUndef;
5380 unsigned SplatBitSize;
5381 bool HasAnyUndefs;
5382 // Check for constant splats. Use ElemBitSize as the minimum element
5383 // width and reject splats that need wider elements.
5384 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
5385 ElemBitSize, true) &&
5386 SplatBitSize == ElemBitSize) {
5387 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
5388 DL, MVT::i32);
5389 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
5390 }
5391 // Check for variable splats.
5392 BitVector UndefElements;
5393 SDValue Splat = BVN->getSplatValue(&UndefElements);
5394 if (Splat) {
5395 // Since i32 is the smallest legal type, we either need a no-op
5396 // or a truncation.
5397 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
5398 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
5399 }
5400 }
5401
5402 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
5403 // and the shift amount is directly available in a GPR.
5404 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
5405 if (VSN->isSplat()) {
5406 SDValue VSNOp0 = VSN->getOperand(0);
5407 unsigned Index = VSN->getSplatIndex();
5408 assert(Index < VT.getVectorNumElements() &&(static_cast <bool> (Index < VT.getVectorNumElements
() && "Splat index should be defined and in first operand"
) ? void (0) : __assert_fail ("Index < VT.getVectorNumElements() && \"Splat index should be defined and in first operand\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 5409, __extension__ __PRETTY_FUNCTION__))
5409 "Splat index should be defined and in first operand")(static_cast <bool> (Index < VT.getVectorNumElements
() && "Splat index should be defined and in first operand"
) ? void (0) : __assert_fail ("Index < VT.getVectorNumElements() && \"Splat index should be defined and in first operand\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 5409, __extension__ __PRETTY_FUNCTION__))
;
5410 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5411 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
5412 // Since i32 is the smallest legal type, we either need a no-op
5413 // or a truncation.
5414 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
5415 VSNOp0.getOperand(Index));
5416 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
5417 }
5418 }
5419 }
5420
5421 // Otherwise just treat the current form as legal.
5422 return Op;
5423}
5424
5425SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
5426 SelectionDAG &DAG) const {
5427 switch (Op.getOpcode()) {
5428 case ISD::FRAMEADDR:
5429 return lowerFRAMEADDR(Op, DAG);
5430 case ISD::RETURNADDR:
5431 return lowerRETURNADDR(Op, DAG);
5432 case ISD::BR_CC:
5433 return lowerBR_CC(Op, DAG);
5434 case ISD::SELECT_CC:
5435 return lowerSELECT_CC(Op, DAG);
5436 case ISD::SETCC:
5437 return lowerSETCC(Op, DAG);
5438 case ISD::STRICT_FSETCC:
5439 return lowerSTRICT_FSETCC(Op, DAG, false);
5440 case ISD::STRICT_FSETCCS:
5441 return lowerSTRICT_FSETCC(Op, DAG, true);
5442 case ISD::GlobalAddress:
5443 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
5444 case ISD::GlobalTLSAddress:
5445 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
5446 case ISD::BlockAddress:
5447 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
5448 case ISD::JumpTable:
5449 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
5450 case ISD::ConstantPool:
5451 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
5452 case ISD::BITCAST:
5453 return lowerBITCAST(Op, DAG);
5454 case ISD::VASTART:
5455 return lowerVASTART(Op, DAG);
5456 case ISD::VACOPY:
5457 return lowerVACOPY(Op, DAG);
5458 case ISD::DYNAMIC_STACKALLOC:
5459 return lowerDYNAMIC_STACKALLOC(Op, DAG);
5460 case ISD::GET_DYNAMIC_AREA_OFFSET:
5461 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
5462 case ISD::SMUL_LOHI:
5463 return lowerSMUL_LOHI(Op, DAG);
5464 case ISD::UMUL_LOHI:
5465 return lowerUMUL_LOHI(Op, DAG);
5466 case ISD::SDIVREM:
5467 return lowerSDIVREM(Op, DAG);
5468 case ISD::UDIVREM:
5469 return lowerUDIVREM(Op, DAG);
5470 case ISD::SADDO:
5471 case ISD::SSUBO:
5472 case ISD::UADDO:
5473 case ISD::USUBO:
5474 return lowerXALUO(Op, DAG);
5475 case ISD::ADDCARRY:
5476 case ISD::SUBCARRY:
5477 return lowerADDSUBCARRY(Op, DAG);
5478 case ISD::OR:
5479 return lowerOR(Op, DAG);
5480 case ISD::CTPOP:
5481 return lowerCTPOP(Op, DAG);
5482 case ISD::ATOMIC_FENCE:
5483 return lowerATOMIC_FENCE(Op, DAG);
5484 case ISD::ATOMIC_SWAP:
5485 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
5486 case ISD::ATOMIC_STORE:
5487 return lowerATOMIC_STORE(Op, DAG);
5488 case ISD::ATOMIC_LOAD:
5489 return lowerATOMIC_LOAD(Op, DAG);
5490 case ISD::ATOMIC_LOAD_ADD:
5491 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
5492 case ISD::ATOMIC_LOAD_SUB:
5493 return lowerATOMIC_LOAD_SUB(Op, DAG);
5494 case ISD::ATOMIC_LOAD_AND:
5495 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
5496 case ISD::ATOMIC_LOAD_OR:
5497 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
5498 case ISD::ATOMIC_LOAD_XOR:
5499 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
5500 case ISD::ATOMIC_LOAD_NAND:
5501 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
5502 case ISD::ATOMIC_LOAD_MIN:
5503 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
5504 case ISD::ATOMIC_LOAD_MAX:
5505 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
5506 case ISD::ATOMIC_LOAD_UMIN:
5507 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
5508 case ISD::ATOMIC_LOAD_UMAX:
5509 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
5510 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
5511 return lowerATOMIC_CMP_SWAP(Op, DAG);
5512 case ISD::STACKSAVE:
5513 return lowerSTACKSAVE(Op, DAG);
5514 case ISD::STACKRESTORE:
5515 return lowerSTACKRESTORE(Op, DAG);
5516 case ISD::PREFETCH:
5517 return lowerPREFETCH(Op, DAG);
5518 case ISD::INTRINSIC_W_CHAIN:
5519 return lowerINTRINSIC_W_CHAIN(Op, DAG);
5520 case ISD::INTRINSIC_WO_CHAIN:
5521 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
5522 case ISD::BUILD_VECTOR:
5523 return lowerBUILD_VECTOR(Op, DAG);
5524 case ISD::VECTOR_SHUFFLE:
5525 return lowerVECTOR_SHUFFLE(Op, DAG);
5526 case ISD::SCALAR_TO_VECTOR:
5527 return lowerSCALAR_TO_VECTOR(Op, DAG);
5528 case ISD::INSERT_VECTOR_ELT:
5529 return lowerINSERT_VECTOR_ELT(Op, DAG);
5530 case ISD::EXTRACT_VECTOR_ELT:
5531 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
5532 case ISD::SIGN_EXTEND_VECTOR_INREG:
5533 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
5534 case ISD::ZERO_EXTEND_VECTOR_INREG:
5535 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
5536 case ISD::SHL:
5537 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
5538 case ISD::SRL:
5539 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
5540 case ISD::SRA:
5541 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
5542 default:
5543 llvm_unreachable("Unexpected node to lower")::llvm::llvm_unreachable_internal("Unexpected node to lower",
"/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 5543)
;
5544 }
5545}
5546
5547// Lower operations with invalid operand or result types (currently used
5548// only for 128-bit integer types).
5549void
5550SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
5551 SmallVectorImpl<SDValue> &Results,
5552 SelectionDAG &DAG) const {
5553 switch (N->getOpcode()) {
5554 case ISD::ATOMIC_LOAD: {
5555 SDLoc DL(N);
5556 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
5557 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
5558 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5559 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
5560 DL, Tys, Ops, MVT::i128, MMO);
5561 Results.push_back(lowerGR128ToI128(DAG, Res));
5562 Results.push_back(Res.getValue(1));
5563 break;
5564 }
5565 case ISD::ATOMIC_STORE: {
5566 SDLoc DL(N);
5567 SDVTList Tys = DAG.getVTList(MVT::Other);
5568 SDValue Ops[] = { N->getOperand(0),
5569 lowerI128ToGR128(DAG, N->getOperand(2)),
5570 N->getOperand(1) };
5571 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5572 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128,
5573 DL, Tys, Ops, MVT::i128, MMO);
5574 // We have to enforce sequential consistency by performing a
5575 // serialization operation after the store.
5576 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
5577 AtomicOrdering::SequentiallyConsistent)
5578 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
5579 MVT::Other, Res), 0);
5580 Results.push_back(Res);
5581 break;
5582 }
5583 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
5584 SDLoc DL(N);
5585 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
5586 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
5587 lowerI128ToGR128(DAG, N->getOperand(2)),
5588 lowerI128ToGR128(DAG, N->getOperand(3)) };
5589 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5590 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128,
5591 DL, Tys, Ops, MVT::i128, MMO);
5592 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
5593 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);
5594 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
5595 Results.push_back(lowerGR128ToI128(DAG, Res));
5596 Results.push_back(Success);
5597 Results.push_back(Res.getValue(2));
5598 break;
5599 }
5600 case ISD::BITCAST: {
5601 SDValue Src = N->getOperand(0);
5602 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
5603 !useSoftFloat()) {
5604 SDLoc DL(N);
5605 SDValue Lo, Hi;
5606 if (getRepRegClassFor(MVT::f128) == &SystemZ::VR128BitRegClass) {
5607 SDValue VecBC = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Src);
5608 Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
5609 DAG.getConstant(1, DL, MVT::i32));
5610 Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
5611 DAG.getConstant(0, DL, MVT::i32));
5612 } else {
5613 assert(getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass &&(static_cast <bool> (getRepRegClassFor(MVT::f128) == &
SystemZ::FP128BitRegClass && "Unrecognized register class for f128."
) ? void (0) : __assert_fail ("getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass && \"Unrecognized register class for f128.\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 5614, __extension__ __PRETTY_FUNCTION__))
5614 "Unrecognized register class for f128.")(static_cast <bool> (getRepRegClassFor(MVT::f128) == &
SystemZ::FP128BitRegClass && "Unrecognized register class for f128."
) ? void (0) : __assert_fail ("getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass && \"Unrecognized register class for f128.\""
, "/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 5614, __extension__ __PRETTY_FUNCTION__))
;
5615 SDValue LoFP = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
5616 DL, MVT::f64, Src);
5617 SDValue HiFP = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
5618 DL, MVT::f64, Src);
5619 Lo = DAG.getNode(ISD::BITCAST, DL, MVT::i64, LoFP);
5620 Hi = DAG.getNode(ISD::BITCAST, DL, MVT::i64, HiFP);
5621 }
5622 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi));
5623 }
5624 break;
5625 }
5626 default:
5627 llvm_unreachable("Unexpected node to lower")::llvm::llvm_unreachable_internal("Unexpected node to lower",
"/build/llvm-toolchain-snapshot-14~++20210926122410+d23fd8ae8906/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 5627)
;
5628 }
5629}
5630
5631void
5632SystemZTargetLowering::ReplaceNodeResults(SDNode *N,
5633 SmallVectorImpl<SDValue> &Results,
5634 SelectionDAG &DAG) const {
5635 return LowerOperationWrapper(N, Results, DAG);
5636}
5637
5638const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
5639#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
5640 switch ((SystemZISD::NodeType)Opcode) {
5641 case SystemZISD::FIRST_NUMBER: break;
5642 OPCODE(RET_FLAG);
5643 OPCODE(CALL);
5644 OPCODE(SIBCALL);
5645 OPCODE(TLS_GDCALL);
5646 OPCODE(TLS_LDCALL);
5647 OPCODE(PCREL_WRAPPER);
5648 OPCODE(PCREL_OFFSET);
5649 OPCODE(ICMP);
5650 OPCODE(FCMP);
5651 OPCODE(STRICT_FCMP);
5652 OPCODE(STRICT_FCMPS);
5653 OPCODE(TM);
5654 OPCODE(BR_CCMASK);
5655 OPCODE(SELECT_CCMASK);
5656 OPCODE(ADJDYNALLOC);
5657 OPCODE(PROBED_ALLOCA);
5658 OPCODE(POPCNT);
5659 OPCODE(SMUL_LOHI);
5660 OPCODE(UMUL_LOHI);
5661 OPCODE(SDIVREM);
5662 OPCODE(UDIVREM);
5663 OPCODE(SADDO);
5664 OPCODE(SSUBO);
5665 OPCODE(UADDO);
5666 OPCODE(USUBO);
5667 OPCODE(ADDCARRY);
5668 OPCODE(SUBCARRY);
5669 OPCODE(GET_CCMASK);
5670 OPCODE(MVC);
5671 OPCODE(MVC_LOOP);
5672 OPCODE(NC);
5673 OPCODE(NC_LOOP);
5674 OPCODE(OC);
5675 OPCODE(OC_LOOP);
5676 OPCODE(XC);
5677 OPCODE(XC_LOOP);
5678 OPCODE(CLC);
5679 OPCODE(CLC_LOOP);
5680 OPCODE(STPCPY);
5681 OPCODE(STRCMP);
5682 OPCODE(SEARCH_STRING);
5683 OPCODE(IPM);
5684 OPCODE(MEMBARRIER);
5685 OPCODE(TBEGIN);
5686 OPCODE(TBEGIN_NOFLOAT);
5687 OPCODE(TEND);
5688 OPCODE(BYTE_MASK);
5689 OPCODE(ROTATE_MASK);
5690 OPCODE(REPLICATE);
5691 OPCODE(JOIN_DWORDS);
5692 OPCODE(SPLAT);
5693 OPCODE(MERGE_HIGH);
5694 OPCODE(MERGE_LOW);
5695 OPCODE(SHL_DOUBLE);
5696 OPCODE(PERMUTE_DWORDS);
5697 OPCODE(PERMUTE);
5698 OPCODE(PACK);
5699 OPCODE(PACKS_CC);
5700 OPCODE(PACKLS_CC);
5701 OPCODE(UNPACK_HIGH);
5702 OPCODE(UNPACKL_HIGH);
5703 OPCODE(UNPACK_LOW);
5704 OPCODE(UNPACKL_LOW);
5705 OPCODE(VSHL_BY_SCALAR);
5706 OPCODE(VSRL_BY_SCALAR);
5707 OPCODE(VSRA_BY_SCALAR);
5708 OPCODE(VSUM);
5709 OPCODE(VICMPE);
5710 OPCODE(VICMPH);
5711 OPCODE(VICMPHL);
5712 OPCODE(VICMPES);
5713 OPCODE(VICMPHS);
5714 OPCODE(VICMPHLS);
5715 OPCODE(VFCMPE);
5716 OPCODE(STRICT_VFCMPE);
5717 OPCODE(STRICT_VFCMPES);
5718 OPCODE(VFCMPH);
5719 OPCODE(STRICT_VFCMPH);
5720 OPCODE(STRICT_VFCMPHS);
5721 OPCODE(VFCMPHE);
5722 OPCODE(STRICT_VFCMPHE);
5723 OPCODE(STRICT_VFCMPHES);
5724 OPCODE(VFCMPES);
5725 OPCODE(VFCMPHS);
5726 OPCODE(VFCMPHES);
5727 OPCODE(VFTCI);
5728 OPCODE(VEXTEND);
5729 OPCODE(STRICT_VEXTEND);
5730 OPCODE(VROUND);
5731 OPCODE(STRICT_VROUND);
5732 OPCODE(VTM);
5733 OPCODE(VFAE_CC);
5734 OPCODE(VFAEZ_CC);
5735 OPCODE(VFEE_CC);
5736 OPCODE(VFEEZ_CC);
5737 OPCODE(VFENE_CC);
5738 OPCODE(VFENEZ_CC);
5739 OPCODE(VISTR_CC);
5740 OPCODE(VSTRC_CC);
5741 OPCODE(VSTRCZ_CC);
5742 OPCODE(VSTRS_CC);
5743 OPCODE(VSTRSZ_CC);
5744 OPCODE(TDC);
5745 OPCODE(ATOMIC_SWAPW);
5746 OPCODE(ATOMIC_LOADW_ADD);
5747 OPCODE(ATOMIC_LOADW_SUB);
5748 OPCODE(ATOMIC_LOADW_AND);
5749 OPCODE(ATOMIC_LOADW_OR);
5750 OPCODE(ATOMIC_LOADW_XOR);
5751 OPCODE(ATOMIC_LOADW_NAND);
5752 OPCODE(ATOMIC_LOADW_MIN);
5753 OPCODE(ATOMIC_LOADW_MAX);
5754 OPCODE(ATOMIC_LOADW_UMIN);
5755 OPCODE(ATOMIC_LOADW_UMAX);
5756 OPCODE(ATOMIC_CMP_SWAPW);
5757 OPCODE(ATOMIC_CMP_SWAP);
5758 OPCODE(ATOMIC_LOAD_128);
5759 OPCODE(ATOMIC_STORE_128);
5760 OPCODE(ATOMIC_CMP_SWAP_128);
5761 OPCODE(LRV);
5762 OPCODE(STRV);
5763 OPCODE(VLER);
5764 OPCODE(VSTER);
5765 OPCODE(PREFETCH);
5766 }
5767 return nullptr;
5768#undef OPCODE
5769}
5770
5771// Return true if VT is a vector whose elements are a whole number of bytes
5772// in width. Also check for presence of vector support.
5773bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
5774 if (!Subtarget.hasVector())
5775 return false;
5776
5777 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
5778}
5779
5780// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
5781// producing a result of type ResVT. Op is a possibly bitcast version
5782// of the input vector and Index is the index (based on type VecVT) that
5783// should be extracted. Return the new extraction if a simplification
5784// was possible or if Force is true.
5785SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
5786 EVT VecVT, SDValue Op,
5787 unsigned Index,
5788 DAGCombinerInfo &DCI,
5789 bool Force) const {
5790 SelectionDAG &DAG = DCI.DAG;
5791
5792 // The number of bytes being extracted.
5793 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
5794
5795 for (;;) {
5796 unsigned Opcode = Op.getOpcode();
5797 if (Opcode == ISD::BITCAST)
5798 // Look through bitcasts.
5799 Op = Op.getOperand(0);
5800 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
5801 canTreatAsByteVector(Op.getValueType())) {
5802 // Get a VPERM-like permute mask and see whether the bytes covered
5803 // by the extracted element are a contiguous sequence from one
5804 // source operand.
5805 SmallVector<int, SystemZ::VectorBytes> Bytes;
5806 if (!getVPermMask(Op, Bytes))
5807 break;
5808 int First;
5809 if (!getShuffleInput(Bytes, Index * BytesPerElement,
5810 BytesPerElement, First))
5811 break;
5812 if (First < 0)
5813 return DAG.getUNDEF(ResVT);
5814 // Make sure the contiguous sequence starts at a multiple of the
5815 // original element size.
5816 unsigned Byte = unsigned(First) % Bytes.size();
5817 if (Byte % BytesPerElement != 0)
5818 break;
5819 // We can get the extracted value directly from an input.
5820 Index = Byte / BytesPerElement;
5821 Op = Op.getOperand(unsigned(First) / Bytes.size());
5822 Force = true;
5823 } else if (Opcode == ISD::BUILD_VECTOR &&
5824 canTreatAsByteVector(Op.getValueType())) {
5825 // We can only optimize this case if the BUILD_VECTOR elements are
5826 // at least as wide as the extracted value.
5827 EVT OpVT = Op.getValueType();
5828 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
5829 if (OpBytesPerElement < BytesPerElement)
5830 break;
5831 // Make sure that the least-significant bit of the extracted value
5832 // is the least significant bit of an input.
5833 unsigned End = (Index + 1) * BytesPerElement;
5834 if (End % OpBytesPerElement != 0)
5835 break;
5836 // We're extracting the low part of one operand of the BUILD_VECTOR.
5837 Op = Op.getOperand(End / OpBytesPerElement - 1);
5838 if (!Op.getValueType().isInteger()) {
5839 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
5840 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
5841 DCI.AddToWorklist(Op.getNode());
5842 }
5843 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
5844 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
5845 if (VT != ResVT) {
5846 DCI.AddToWorklist(Op.getNode());
5847 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
5848 }
5849 return Op;
5850 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
5851 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
5852 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
5853 canTreatAsByteVector(Op.getValueType()) &&
5854 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
5855 // Make sure that only the unextended bits are significant.
5856 EVT ExtVT = Op.getValueType();
5857 EVT OpVT = Op.getOperand(0).getValueType();
5858 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
5859 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
5860 unsigned Byte = Index * BytesPerElement;
5861 unsigned SubByte = Byte % ExtBytesPerElement;
5862 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
5863 if (SubByte < MinSubByte ||
5864 SubByte + BytesPerElement > ExtBytesPerElement)
5865 break;
5866 // Get the byte offset of the unextended element
5867 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
5868 // ...then add the byte offset relative to that element.
5869 Byte += SubByte - MinSubByte;
5870 if (Byte % BytesPerElement != 0)
5871 break;
5872 Op = Op.getOperand(0);
5873 Index = Byte / BytesPerElement;
5874 Force = true;
5875 } else
5876 break;
5877 }
5878 if (Force) {
5879 if (Op.getValueType() != VecVT) {
5880 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
5881 DCI.AddToWorklist(Op.getNode());
5882 }
5883 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
5884 DAG.getConstant(Index, DL, MVT::i32));
5885 }
5886 return SDValue();
5887}
5888
5889// Optimize vector operations in scalar value Op on the basis that Op
5890// is truncated to TruncVT.
5891SDValue SystemZTargetLowering::combineTruncateExtract(
5892 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
5893 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
5894 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
5895 // of type TruncVT.
5896 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5897 TruncVT.getSizeInBits() % 8 == 0) {
5898 SDValue Vec = Op.getOperand(0);
5899 EVT VecVT = Vec.getValueType();
5900 if (canTreatAsByteVector(VecVT)) {
5901 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
5902 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
5903 unsigned TruncBytes = TruncVT.getStoreSize();
5904 if (BytesPerElement % TruncBytes == 0) {
5905 // Calculate the value of Y' in the above description. We are
5906 // splitting the original elements into Scale equal-sized pieces
5907 // and for truncation purposes want the last (least-significant)
5908 // of these pieces for IndexN. This is easiest to do by calculating
5909 // the start index of the following element and then subtracting 1.
5910 unsigned Scale = BytesPerElement / TruncBytes;
5911 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
5912
5913 // Defer the creation of the bitcast from X to combineExtract,
5914 // which might be able to optimize the extraction.
5915 VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
5916 VecVT.getStoreSize() / TruncBytes);
5917 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
5918 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
5919 }
5920 }
5921 }
5922 }
5923 return SDValue();
5924}
5925
5926SDValue SystemZTargetLowering::combineZERO_EXTEND(
5927 SDNode *N, DAGCombinerInfo &DCI) const {
5928 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
5929 SelectionDAG &DAG = DCI.DAG;
5930 SDValue N0 = N->getOperand(0);
5931 EVT VT = N->getValueType(0);
5932 if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) {
5933 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
5934 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5935 if (TrueOp && FalseOp) {
5936 SDLoc DL(N0);
5937 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
5938 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
5939 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
5940 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
5941 // If N0 has multiple uses, change other uses as well.
5942 if (!N0.hasOneUse()) {
5943 SDValue TruncSelect =
5944 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
5945 DCI.CombineTo(N0.getNode(), TruncSelect);
5946 }
5947 return NewSelect;
5948 }
5949 }
5950 return SDValue();
5951}
5952
5953SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
5954 SDNode *N, DAGCombinerInfo &DCI) const {
5955 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
5956 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
5957 // into (select_cc LHS, RHS, -1, 0, COND)
5958 SelectionDAG &DAG = DCI.DAG;
5959 SDValue N0 = N->getOperand(0);
5960 EVT VT = N->getValueType(0);
5961 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
5962 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
5963 N0 = N0.getOperand(0);
5964 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
5965 SDLoc DL(N0);
5966 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
5967 DAG.getConstant(-1, DL, VT), DAG.getConstant(0, DL, VT),
5968 N0.getOperand(2) };
5969 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
5970 }
5971 return SDValue();
5972}
5973
5974SDValue SystemZTargetLowering::combineSIGN_EXTEND(
5975 SDNode *N, DAGCombinerInfo &DCI) const {
5976 // Convert (sext (ashr (shl X, C1), C2)) to
5977 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
5978 // cheap as narrower ones.
5979 SelectionDAG &DAG = DCI.DAG;
5980 SDValue N0 = N->getOperand(0);
5981 EVT VT = N->getValueType(0);
5982 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {