Bug Summary

File:build/source/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
Warning:line 785, column 36
The result of the left shift is undefined due to shifting by '18446744073709551615', which is greater or equal to the width of type 'uint64_t'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name SystemZISelLowering.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-16/lib/clang/16 -I lib/Target/SystemZ -I /build/source/llvm/lib/Target/SystemZ -I include -I /build/source/llvm/include -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1674602410 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-01-25-024556-16494-1 -x c++ /build/source/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

/build/source/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
15#include "SystemZConstantPoolValue.h"
16#include "SystemZMachineFunctionInfo.h"
17#include "SystemZTargetMachine.h"
18#include "llvm/CodeGen/CallingConvLower.h"
19#include "llvm/CodeGen/MachineInstrBuilder.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
22#include "llvm/IR/IntrinsicInst.h"
23#include "llvm/IR/Intrinsics.h"
24#include "llvm/IR/IntrinsicsS390.h"
25#include "llvm/Support/CommandLine.h"
26#include "llvm/Support/KnownBits.h"
27#include <cctype>
28#include <optional>
29
30using namespace llvm;
31
32#define DEBUG_TYPE"systemz-lower" "systemz-lower"
33
34namespace {
35// Represents information about a comparison.
36struct Comparison {
37 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
38 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
39 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
40
41 // The operands to the comparison.
42 SDValue Op0, Op1;
43
44 // Chain if this is a strict floating-point comparison.
45 SDValue Chain;
46
47 // The opcode that should be used to compare Op0 and Op1.
48 unsigned Opcode;
49
50 // A SystemZICMP value. Only used for integer comparisons.
51 unsigned ICmpType;
52
53 // The mask of CC values that Opcode can produce.
54 unsigned CCValid;
55
56 // The mask of CC values for which the original condition is true.
57 unsigned CCMask;
58};
59} // end anonymous namespace
60
61// Classify VT as either 32 or 64 bit.
62static bool is32Bit(EVT VT) {
63 switch (VT.getSimpleVT().SimpleTy) {
64 case MVT::i32:
65 return true;
66 case MVT::i64:
67 return false;
68 default:
69 llvm_unreachable("Unsupported type")::llvm::llvm_unreachable_internal("Unsupported type", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 69)
;
70 }
71}
72
73// Return a version of MachineOperand that can be safely used before the
74// final use.
75static MachineOperand earlyUseOperand(MachineOperand Op) {
76 if (Op.isReg())
77 Op.setIsKill(false);
78 return Op;
79}
80
81SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
82 const SystemZSubtarget &STI)
83 : TargetLowering(TM), Subtarget(STI) {
84 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
85
86 auto *Regs = STI.getSpecialRegisters();
87
88 // Set up the register classes.
89 if (Subtarget.hasHighWord())
90 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
91 else
92 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
93 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
94 if (!useSoftFloat()) {
95 if (Subtarget.hasVector()) {
96 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
97 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
98 } else {
99 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
100 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
101 }
102 if (Subtarget.hasVectorEnhancements1())
103 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
104 else
105 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
106
107 if (Subtarget.hasVector()) {
108 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
109 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
110 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
111 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
112 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
113 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
114 }
115 }
116
117 // Compute derived properties from the register classes
118 computeRegisterProperties(Subtarget.getRegisterInfo());
119
120 // Set up special registers.
121 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
122
123 // TODO: It may be better to default to latency-oriented scheduling, however
124 // LLVM's current latency-oriented scheduler can't handle physreg definitions
125 // such as SystemZ has with CC, so set this to the register-pressure
126 // scheduler, because it can.
127 setSchedulingPreference(Sched::RegPressure);
128
129 setBooleanContents(ZeroOrOneBooleanContent);
130 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
131
132 // Instructions are strings of 2-byte aligned 2-byte values.
133 setMinFunctionAlignment(Align(2));
134 // For performance reasons we prefer 16-byte alignment.
135 setPrefFunctionAlignment(Align(16));
136
137 // Handle operations that are handled in a similar way for all types.
138 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
139 I <= MVT::LAST_FP_VALUETYPE;
140 ++I) {
141 MVT VT = MVT::SimpleValueType(I);
142 if (isTypeLegal(VT)) {
143 // Lower SET_CC into an IPM-based sequence.
144 setOperationAction(ISD::SETCC, VT, Custom);
145 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
146 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
147
148 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
149 setOperationAction(ISD::SELECT, VT, Expand);
150
151 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
152 setOperationAction(ISD::SELECT_CC, VT, Custom);
153 setOperationAction(ISD::BR_CC, VT, Custom);
154 }
155 }
156
157 // Expand jump table branches as address arithmetic followed by an
158 // indirect jump.
159 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
160
161 // Expand BRCOND into a BR_CC (see above).
162 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
163
164 // Handle integer types.
165 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
166 I <= MVT::LAST_INTEGER_VALUETYPE;
167 ++I) {
168 MVT VT = MVT::SimpleValueType(I);
169 if (isTypeLegal(VT)) {
170 setOperationAction(ISD::ABS, VT, Legal);
171
172 // Expand individual DIV and REMs into DIVREMs.
173 setOperationAction(ISD::SDIV, VT, Expand);
174 setOperationAction(ISD::UDIV, VT, Expand);
175 setOperationAction(ISD::SREM, VT, Expand);
176 setOperationAction(ISD::UREM, VT, Expand);
177 setOperationAction(ISD::SDIVREM, VT, Custom);
178 setOperationAction(ISD::UDIVREM, VT, Custom);
179
180 // Support addition/subtraction with overflow.
181 setOperationAction(ISD::SADDO, VT, Custom);
182 setOperationAction(ISD::SSUBO, VT, Custom);
183
184 // Support addition/subtraction with carry.
185 setOperationAction(ISD::UADDO, VT, Custom);
186 setOperationAction(ISD::USUBO, VT, Custom);
187
188 // Support carry in as value rather than glue.
189 setOperationAction(ISD::ADDCARRY, VT, Custom);
190 setOperationAction(ISD::SUBCARRY, VT, Custom);
191
192 // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
193 // stores, putting a serialization instruction after the stores.
194 setOperationAction(ISD::ATOMIC_LOAD, VT, Custom);
195 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
196
197 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
198 // available, or if the operand is constant.
199 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
200
201 // Use POPCNT on z196 and above.
202 if (Subtarget.hasPopulationCount())
203 setOperationAction(ISD::CTPOP, VT, Custom);
204 else
205 setOperationAction(ISD::CTPOP, VT, Expand);
206
207 // No special instructions for these.
208 setOperationAction(ISD::CTTZ, VT, Expand);
209 setOperationAction(ISD::ROTR, VT, Expand);
210
211 // Use *MUL_LOHI where possible instead of MULH*.
212 setOperationAction(ISD::MULHS, VT, Expand);
213 setOperationAction(ISD::MULHU, VT, Expand);
214 setOperationAction(ISD::SMUL_LOHI, VT, Custom);
215 setOperationAction(ISD::UMUL_LOHI, VT, Custom);
216
217 // Only z196 and above have native support for conversions to unsigned.
218 // On z10, promoting to i64 doesn't generate an inexact condition for
219 // values that are outside the i32 range but in the i64 range, so use
220 // the default expansion.
221 if (!Subtarget.hasFPExtension())
222 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
223
224 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
225 // default to Expand, so need to be modified to Legal where appropriate.
226 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal);
227 if (Subtarget.hasFPExtension())
228 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal);
229
230 // And similarly for STRICT_[SU]INT_TO_FP.
231 setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal);
232 if (Subtarget.hasFPExtension())
233 setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal);
234 }
235 }
236
237 // Type legalization will convert 8- and 16-bit atomic operations into
238 // forms that operate on i32s (but still keeping the original memory VT).
239 // Lower them into full i32 operations.
240 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom);
241 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom);
242 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
243 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
244 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom);
245 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom);
246 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
247 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom);
248 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom);
249 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
250 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
251
252 // Even though i128 is not a legal type, we still need to custom lower
253 // the atomic operations in order to exploit SystemZ instructions.
254 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
255 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
256
257 // We can use the CC result of compare-and-swap to implement
258 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
259 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom);
260 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom);
261 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
262
263 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
264
265 // Traps are legal, as we will convert them to "j .+2".
266 setOperationAction(ISD::TRAP, MVT::Other, Legal);
267
268 // z10 has instructions for signed but not unsigned FP conversion.
269 // Handle unsigned 32-bit types as signed 64-bit types.
270 if (!Subtarget.hasFPExtension()) {
271 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
272 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
273 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Promote);
274 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
275 }
276
277 // We have native support for a 64-bit CTLZ, via FLOGR.
278 setOperationAction(ISD::CTLZ, MVT::i32, Promote);
279 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
280 setOperationAction(ISD::CTLZ, MVT::i64, Legal);
281
282 // On z15 we have native support for a 64-bit CTPOP.
283 if (Subtarget.hasMiscellaneousExtensions3()) {
284 setOperationAction(ISD::CTPOP, MVT::i32, Promote);
285 setOperationAction(ISD::CTPOP, MVT::i64, Legal);
286 }
287
288 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
289 setOperationAction(ISD::OR, MVT::i64, Custom);
290
291 // Expand 128 bit shifts without using a libcall.
292 setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
293 setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
294 setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
295 setLibcallName(RTLIB::SRL_I128, nullptr);
296 setLibcallName(RTLIB::SHL_I128, nullptr);
297 setLibcallName(RTLIB::SRA_I128, nullptr);
298
299 // Handle bitcast from fp128 to i128.
300 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
301
302 // We have native instructions for i8, i16 and i32 extensions, but not i1.
303 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
304 for (MVT VT : MVT::integer_valuetypes()) {
305 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
306 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
307 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
308 }
309
310 // Handle the various types of symbolic address.
311 setOperationAction(ISD::ConstantPool, PtrVT, Custom);
312 setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
313 setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
314 setOperationAction(ISD::BlockAddress, PtrVT, Custom);
315 setOperationAction(ISD::JumpTable, PtrVT, Custom);
316
317 // We need to handle dynamic allocations specially because of the
318 // 160-byte area at the bottom of the stack.
319 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
320 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom);
321
322 setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
323 setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
324
325 // Handle prefetches with PFD or PFDRL.
326 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
327
328 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
329 // Assume by default that all vector operations need to be expanded.
330 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
331 if (getOperationAction(Opcode, VT) == Legal)
332 setOperationAction(Opcode, VT, Expand);
333
334 // Likewise all truncating stores and extending loads.
335 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
336 setTruncStoreAction(VT, InnerVT, Expand);
337 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
338 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
339 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
340 }
341
342 if (isTypeLegal(VT)) {
343 // These operations are legal for anything that can be stored in a
344 // vector register, even if there is no native support for the format
345 // as such. In particular, we can do these for v4f32 even though there
346 // are no specific instructions for that format.
347 setOperationAction(ISD::LOAD, VT, Legal);
348 setOperationAction(ISD::STORE, VT, Legal);
349 setOperationAction(ISD::VSELECT, VT, Legal);
350 setOperationAction(ISD::BITCAST, VT, Legal);
351 setOperationAction(ISD::UNDEF, VT, Legal);
352
353 // Likewise, except that we need to replace the nodes with something
354 // more specific.
355 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
356 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
357 }
358 }
359
360 // Handle integer vector types.
361 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
362 if (isTypeLegal(VT)) {
363 // These operations have direct equivalents.
364 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
365 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
366 setOperationAction(ISD::ADD, VT, Legal);
367 setOperationAction(ISD::SUB, VT, Legal);
368 if (VT != MVT::v2i64)
369 setOperationAction(ISD::MUL, VT, Legal);
370 setOperationAction(ISD::ABS, VT, Legal);
371 setOperationAction(ISD::AND, VT, Legal);
372 setOperationAction(ISD::OR, VT, Legal);
373 setOperationAction(ISD::XOR, VT, Legal);
374 if (Subtarget.hasVectorEnhancements1())
375 setOperationAction(ISD::CTPOP, VT, Legal);
376 else
377 setOperationAction(ISD::CTPOP, VT, Custom);
378 setOperationAction(ISD::CTTZ, VT, Legal);
379 setOperationAction(ISD::CTLZ, VT, Legal);
380
381 // Convert a GPR scalar to a vector by inserting it into element 0.
382 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
383
384 // Use a series of unpacks for extensions.
385 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
386 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
387
388 // Detect shifts by a scalar amount and convert them into
389 // V*_BY_SCALAR.
390 setOperationAction(ISD::SHL, VT, Custom);
391 setOperationAction(ISD::SRA, VT, Custom);
392 setOperationAction(ISD::SRL, VT, Custom);
393
394 // At present ROTL isn't matched by DAGCombiner. ROTR should be
395 // converted into ROTL.
396 setOperationAction(ISD::ROTL, VT, Expand);
397 setOperationAction(ISD::ROTR, VT, Expand);
398
399 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
400 // and inverting the result as necessary.
401 setOperationAction(ISD::SETCC, VT, Custom);
402 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
403 if (Subtarget.hasVectorEnhancements1())
404 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
405 }
406 }
407
408 if (Subtarget.hasVector()) {
409 // There should be no need to check for float types other than v2f64
410 // since <2 x f32> isn't a legal type.
411 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
412 setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal);
413 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
414 setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal);
415 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
416 setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal);
417 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
418 setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);
419
420 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
421 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal);
422 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
423 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal);
424 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
425 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f64, Legal);
426 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
427 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f64, Legal);
428 }
429
430 if (Subtarget.hasVectorEnhancements2()) {
431 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
432 setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal);
433 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
434 setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal);
435 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
436 setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal);
437 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
438 setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal);
439
440 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
441 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal);
442 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
443 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal);
444 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
445 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f32, Legal);
446 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
447 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f32, Legal);
448 }
449
450 // Handle floating-point types.
451 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
452 I <= MVT::LAST_FP_VALUETYPE;
453 ++I) {
454 MVT VT = MVT::SimpleValueType(I);
455 if (isTypeLegal(VT)) {
456 // We can use FI for FRINT.
457 setOperationAction(ISD::FRINT, VT, Legal);
458
459 // We can use the extended form of FI for other rounding operations.
460 if (Subtarget.hasFPExtension()) {
461 setOperationAction(ISD::FNEARBYINT, VT, Legal);
462 setOperationAction(ISD::FFLOOR, VT, Legal);
463 setOperationAction(ISD::FCEIL, VT, Legal);
464 setOperationAction(ISD::FTRUNC, VT, Legal);
465 setOperationAction(ISD::FROUND, VT, Legal);
466 }
467
468 // No special instructions for these.
469 setOperationAction(ISD::FSIN, VT, Expand);
470 setOperationAction(ISD::FCOS, VT, Expand);
471 setOperationAction(ISD::FSINCOS, VT, Expand);
472 setOperationAction(ISD::FREM, VT, Expand);
473 setOperationAction(ISD::FPOW, VT, Expand);
474
475 // Special treatment.
476 setOperationAction(ISD::IS_FPCLASS, VT, Custom);
477
478 // Handle constrained floating-point operations.
479 setOperationAction(ISD::STRICT_FADD, VT, Legal);
480 setOperationAction(ISD::STRICT_FSUB, VT, Legal);
481 setOperationAction(ISD::STRICT_FMUL, VT, Legal);
482 setOperationAction(ISD::STRICT_FDIV, VT, Legal);
483 setOperationAction(ISD::STRICT_FMA, VT, Legal);
484 setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
485 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
486 setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal);
487 setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
488 if (Subtarget.hasFPExtension()) {
489 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
490 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
491 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
492 setOperationAction(ISD::STRICT_FROUND, VT, Legal);
493 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
494 }
495 }
496 }
497
498 // Handle floating-point vector types.
499 if (Subtarget.hasVector()) {
500 // Scalar-to-vector conversion is just a subreg.
501 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
502 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
503
504 // Some insertions and extractions can be done directly but others
505 // need to go via integers.
506 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
507 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
508 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
509 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
510
511 // These operations have direct equivalents.
512 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
513 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
514 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
515 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
516 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
517 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
518 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
519 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
520 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
521 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
522 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
523 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
524 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
525 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
526
527 // Handle constrained floating-point operations.
528 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
529 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
530 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
531 setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
532 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
533 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
534 setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
535 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
536 setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
537 setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
538 setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
539 setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
540 }
541
542 // The vector enhancements facility 1 has instructions for these.
543 if (Subtarget.hasVectorEnhancements1()) {
544 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
545 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
546 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
547 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
548 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
549 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
550 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
551 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
552 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
553 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
554 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
555 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
556 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
557 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
558
559 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
560 setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal);
561 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
562 setOperationAction(ISD::FMINIMUM, MVT::f64, Legal);
563
564 setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal);
565 setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal);
566 setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);
567 setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal);
568
569 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
570 setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
571 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
572 setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
573
574 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
575 setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
576 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
577 setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
578
579 setOperationAction(ISD::FMAXNUM, MVT::f128, Legal);
580 setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
581 setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
582 setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
583
584 // Handle constrained floating-point operations.
585 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
586 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
587 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
588 setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
589 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
590 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
591 setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
592 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
593 setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
594 setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
595 setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
596 setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
597 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
598 MVT::v4f32, MVT::v2f64 }) {
599 setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal);
600 setOperationAction(ISD::STRICT_FMINNUM, VT, Legal);
601 setOperationAction(ISD::STRICT_FMAXIMUM, VT, Legal);
602 setOperationAction(ISD::STRICT_FMINIMUM, VT, Legal);
603 }
604 }
605
606 // We only have fused f128 multiply-addition on vector registers.
607 if (!Subtarget.hasVectorEnhancements1()) {
608 setOperationAction(ISD::FMA, MVT::f128, Expand);
609 setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand);
610 }
611
612 // We don't have a copysign instruction on vector registers.
613 if (Subtarget.hasVectorEnhancements1())
614 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
615
616 // Needed so that we don't try to implement f128 constant loads using
617 // a load-and-extend of a f80 constant (in cases where the constant
618 // would fit in an f80).
619 for (MVT VT : MVT::fp_valuetypes())
620 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
621
622 // We don't have extending load instruction on vector registers.
623 if (Subtarget.hasVectorEnhancements1()) {
624 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
625 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
626 }
627
628 // Floating-point truncation and stores need to be done separately.
629 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
630 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
631 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
632
633 // We have 64-bit FPR<->GPR moves, but need special handling for
634 // 32-bit forms.
635 if (!Subtarget.hasVector()) {
636 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
637 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
638 }
639
640 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
641 // structure, but VAEND is a no-op.
642 setOperationAction(ISD::VASTART, MVT::Other, Custom);
643 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
644 setOperationAction(ISD::VAEND, MVT::Other, Expand);
645
646 setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
647
648 // Codes for which we want to perform some z-specific combinations.
649 setTargetDAGCombine({ISD::ZERO_EXTEND,
650 ISD::SIGN_EXTEND,
651 ISD::SIGN_EXTEND_INREG,
652 ISD::LOAD,
653 ISD::STORE,
654 ISD::VECTOR_SHUFFLE,
655 ISD::EXTRACT_VECTOR_ELT,
656 ISD::FP_ROUND,
657 ISD::STRICT_FP_ROUND,
658 ISD::FP_EXTEND,
659 ISD::SINT_TO_FP,
660 ISD::UINT_TO_FP,
661 ISD::STRICT_FP_EXTEND,
662 ISD::BSWAP,
663 ISD::SDIV,
664 ISD::UDIV,
665 ISD::SREM,
666 ISD::UREM,
667 ISD::INTRINSIC_VOID,
668 ISD::INTRINSIC_W_CHAIN});
669
670 // Handle intrinsics.
671 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
672 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
673
674 // We want to use MVC in preference to even a single load/store pair.
675 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
676 MaxStoresPerMemcpyOptSize = 0;
677
678 // The main memset sequence is a byte store followed by an MVC.
679 // Two STC or MV..I stores win over that, but the kind of fused stores
680 // generated by target-independent code don't when the byte value is
681 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
682 // than "STC;MVC". Handle the choice in target-specific code instead.
683 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
684 MaxStoresPerMemsetOptSize = 0;
685
686 // Default to having -disable-strictnode-mutation on
687 IsStrictFPEnabled = true;
688}
689
690bool SystemZTargetLowering::useSoftFloat() const {
691 return Subtarget.hasSoftFloat();
692}
693
694EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
695 LLVMContext &, EVT VT) const {
696 if (!VT.isVector())
697 return MVT::i32;
698 return VT.changeVectorElementTypeToInteger();
699}
700
701bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(
702 const MachineFunction &MF, EVT VT) const {
703 VT = VT.getScalarType();
704
705 if (!VT.isSimple())
706 return false;
707
708 switch (VT.getSimpleVT().SimpleTy) {
709 case MVT::f32:
710 case MVT::f64:
711 return true;
712 case MVT::f128:
713 return Subtarget.hasVectorEnhancements1();
714 default:
715 break;
716 }
717
718 return false;
719}
720
721// Return true if the constant can be generated with a vector instruction,
722// such as VGM, VGMB or VREPI.
723bool SystemZVectorConstantInfo::isVectorConstantLegal(
724 const SystemZSubtarget &Subtarget) {
725 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
726 if (!Subtarget.hasVector() ||
3
Assuming the condition is false
727 (isFP128
3.1
Field 'isFP128' is false
3.1
Field 'isFP128' is false
3.1
Field 'isFP128' is false
&& !Subtarget.hasVectorEnhancements1()))
728 return false;
729
730 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
731 // preferred way of creating all-zero and all-one vectors so give it
732 // priority over other methods below.
733 unsigned Mask = 0;
734 unsigned I = 0;
735 for (; I < SystemZ::VectorBytes; ++I) {
4
Loop condition is true. Entering loop body
736 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
737 if (Byte == 0xff)
5
Assuming 'Byte' is not equal to 255
6
Taking false branch
738 Mask |= 1ULL << I;
739 else if (Byte != 0)
7
Assuming 'Byte' is not equal to 0
8
Taking true branch
740 break;
741 }
742 if (I
9.1
'I' is not equal to 'VectorBytes'
9.1
'I' is not equal to 'VectorBytes'
9.1
'I' is not equal to 'VectorBytes'
== SystemZ::VectorBytes) {
9
Execution continues on line 742
10
Taking false branch
743 Opcode = SystemZISD::BYTE_MASK;
744 OpVals.push_back(Mask);
745 VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16);
746 return true;
747 }
748
749 if (SplatBitSize
10.1
Field 'SplatBitSize' is <= 64
10.1
Field 'SplatBitSize' is <= 64
10.1
Field 'SplatBitSize' is <= 64
> 64)
11
Taking false branch
750 return false;
751
752 auto tryValue = [&](uint64_t Value) -> bool {
753 // Try VECTOR REPLICATE IMMEDIATE
754 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
755 if (isInt<16>(SignedValue)) {
756 OpVals.push_back(((unsigned) SignedValue));
757 Opcode = SystemZISD::REPLICATE;
758 VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
759 SystemZ::VectorBits / SplatBitSize);
760 return true;
761 }
762 // Try VECTOR GENERATE MASK
763 unsigned Start, End;
764 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
765 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
766 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
767 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
768 OpVals.push_back(Start - (64 - SplatBitSize));
769 OpVals.push_back(End - (64 - SplatBitSize));
770 Opcode = SystemZISD::ROTATE_MASK;
771 VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
772 SystemZ::VectorBits / SplatBitSize);
773 return true;
774 }
775 return false;
776 };
777
778 // First try assuming that any undefined bits above the highest set bit
779 // and below the lowest set bit are 1s. This increases the likelihood of
780 // being able to use a sign-extended element value in VECTOR REPLICATE
781 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
782 uint64_t SplatBitsZ = SplatBits.getZExtValue();
783 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
784 uint64_t Lower =
785 (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
12
Calling 'findFirstSet<unsigned long>'
19
Returning from 'findFirstSet<unsigned long>'
20
The result of the left shift is undefined due to shifting by '18446744073709551615', which is greater or equal to the width of type 'uint64_t'
786 uint64_t Upper =
787 (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
788 if (tryValue(SplatBitsZ | Upper | Lower))
789 return true;
790
791 // Now try assuming that any undefined bits between the first and
792 // last defined set bits are set. This increases the chances of
793 // using a non-wraparound mask.
794 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
795 return tryValue(SplatBitsZ | Middle);
796}
797
798SystemZVectorConstantInfo::SystemZVectorConstantInfo(APInt IntImm) {
799 if (IntImm.isSingleWord()) {
800 IntBits = APInt(128, IntImm.getZExtValue());
801 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
802 } else
803 IntBits = IntImm;
804 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.")(static_cast <bool> (IntBits.getBitWidth() == 128 &&
"Unsupported APInt.") ? void (0) : __assert_fail ("IntBits.getBitWidth() == 128 && \"Unsupported APInt.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 804, __extension__
__PRETTY_FUNCTION__))
;
805
806 // Find the smallest splat.
807 SplatBits = IntImm;
808 unsigned Width = SplatBits.getBitWidth();
809 while (Width > 8) {
810 unsigned HalfSize = Width / 2;
811 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
812 APInt LowValue = SplatBits.trunc(HalfSize);
813
814 // If the two halves do not match, stop here.
815 if (HighValue != LowValue || 8 > HalfSize)
816 break;
817
818 SplatBits = HighValue;
819 Width = HalfSize;
820 }
821 SplatUndef = 0;
822 SplatBitSize = Width;
823}
824
825SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) {
826 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR")(static_cast <bool> (BVN->isConstant() && "Expected a constant BUILD_VECTOR"
) ? void (0) : __assert_fail ("BVN->isConstant() && \"Expected a constant BUILD_VECTOR\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 826, __extension__
__PRETTY_FUNCTION__))
;
827 bool HasAnyUndefs;
828
829 // Get IntBits by finding the 128 bit splat.
830 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
831 true);
832
833 // Get SplatBits by finding the 8 bit or greater splat.
834 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
835 true);
836}
837
838bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
839 bool ForCodeSize) const {
840 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
841 if (Imm.isZero() || Imm.isNegZero())
1
Taking false branch
842 return true;
843
844 return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
2
Calling 'SystemZVectorConstantInfo::isVectorConstantLegal'
845}
846
847/// Returns true if stack probing through inline assembly is requested.
848bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
849 // If the function specifically requests inline stack probes, emit them.
850 if (MF.getFunction().hasFnAttribute("probe-stack"))
851 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
852 "inline-asm";
853 return false;
854}
855
856bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
857 // We can use CGFI or CLGFI.
858 return isInt<32>(Imm) || isUInt<32>(Imm);
859}
860
861bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
862 // We can use ALGFI or SLGFI.
863 return isUInt<32>(Imm) || isUInt<32>(-Imm);
864}
865
866bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
867 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
868 // Unaligned accesses should never be slower than the expanded version.
869 // We check specifically for aligned accesses in the few cases where
870 // they are required.
871 if (Fast)
872 *Fast = 1;
873 return true;
874}
875
876// Information about the addressing mode for a memory access.
877struct AddressingMode {
878 // True if a long displacement is supported.
879 bool LongDisplacement;
880
881 // True if use of index register is supported.
882 bool IndexReg;
883
884 AddressingMode(bool LongDispl, bool IdxReg) :
885 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
886};
887
888// Return the desired addressing mode for a Load which has only one use (in
889// the same block) which is a Store.
890static AddressingMode getLoadStoreAddrMode(bool HasVector,
891 Type *Ty) {
892 // With vector support a Load->Store combination may be combined to either
893 // an MVC or vector operations and it seems to work best to allow the
894 // vector addressing mode.
895 if (HasVector)
896 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
897
898 // Otherwise only the MVC case is special.
899 bool MVC = Ty->isIntegerTy(8);
900 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
901}
902
903// Return the addressing mode which seems most desirable given an LLVM
904// Instruction pointer.
905static AddressingMode
906supportedAddressingMode(Instruction *I, bool HasVector) {
907 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
908 switch (II->getIntrinsicID()) {
909 default: break;
910 case Intrinsic::memset:
911 case Intrinsic::memmove:
912 case Intrinsic::memcpy:
913 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
914 }
915 }
916
917 if (isa<LoadInst>(I) && I->hasOneUse()) {
918 auto *SingleUser = cast<Instruction>(*I->user_begin());
919 if (SingleUser->getParent() == I->getParent()) {
920 if (isa<ICmpInst>(SingleUser)) {
921 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
922 if (C->getBitWidth() <= 64 &&
923 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
924 // Comparison of memory with 16 bit signed / unsigned immediate
925 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
926 } else if (isa<StoreInst>(SingleUser))
927 // Load->Store
928 return getLoadStoreAddrMode(HasVector, I->getType());
929 }
930 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
931 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
932 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
933 // Load->Store
934 return getLoadStoreAddrMode(HasVector, LoadI->getType());
935 }
936
937 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
938
939 // * Use LDE instead of LE/LEY for z13 to avoid partial register
940 // dependencies (LDE only supports small offsets).
941 // * Utilize the vector registers to hold floating point
942 // values (vector load / store instructions only support small
943 // offsets).
944
945 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
946 I->getOperand(0)->getType());
947 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
948 bool IsVectorAccess = MemAccessTy->isVectorTy();
949
950 // A store of an extracted vector element will be combined into a VSTE type
951 // instruction.
952 if (!IsVectorAccess && isa<StoreInst>(I)) {
953 Value *DataOp = I->getOperand(0);
954 if (isa<ExtractElementInst>(DataOp))
955 IsVectorAccess = true;
956 }
957
958 // A load which gets inserted into a vector element will be combined into a
959 // VLE type instruction.
960 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
961 User *LoadUser = *I->user_begin();
962 if (isa<InsertElementInst>(LoadUser))
963 IsVectorAccess = true;
964 }
965
966 if (IsFPAccess || IsVectorAccess)
967 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
968 }
969
970 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
971}
972
973bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
974 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
975 // Punt on globals for now, although they can be used in limited
976 // RELATIVE LONG cases.
977 if (AM.BaseGV)
978 return false;
979
980 // Require a 20-bit signed offset.
981 if (!isInt<20>(AM.BaseOffs))
982 return false;
983
984 bool RequireD12 = Subtarget.hasVector() && Ty->isVectorTy();
985 AddressingMode SupportedAM(!RequireD12, true);
986 if (I != nullptr)
987 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
988
989 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
990 return false;
991
992 if (!SupportedAM.IndexReg)
993 // No indexing allowed.
994 return AM.Scale == 0;
995 else
996 // Indexing is OK but no scale factor can be applied.
997 return AM.Scale == 0 || AM.Scale == 1;
998}
999
1000bool SystemZTargetLowering::findOptimalMemOpLowering(
1001 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
1002 unsigned SrcAS, const AttributeList &FuncAttributes) const {
1003 const int MVCFastLen = 16;
1004
1005 if (Limit != ~unsigned(0)) {
1006 // Don't expand Op into scalar loads/stores in these cases:
1007 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1008 return false; // Small memcpy: Use MVC
1009 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1010 return false; // Small memset (first byte with STC/MVI): Use MVC
1011 if (Op.isZeroMemset())
1012 return false; // Memset zero: Use XC
1013 }
1014
1015 return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
1016 SrcAS, FuncAttributes);
1017}
1018
1019EVT SystemZTargetLowering::getOptimalMemOpType(const MemOp &Op,
1020 const AttributeList &FuncAttributes) const {
1021 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1022}
1023
1024bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1025 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1026 return false;
1027 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1028 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1029 return FromBits > ToBits;
1030}
1031
1032bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
1033 if (!FromVT.isInteger() || !ToVT.isInteger())
1034 return false;
1035 unsigned FromBits = FromVT.getFixedSizeInBits();
1036 unsigned ToBits = ToVT.getFixedSizeInBits();
1037 return FromBits > ToBits;
1038}
1039
1040//===----------------------------------------------------------------------===//
1041// Inline asm support
1042//===----------------------------------------------------------------------===//
1043
1044TargetLowering::ConstraintType
1045SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
1046 if (Constraint.size() == 1) {
1047 switch (Constraint[0]) {
1048 case 'a': // Address register
1049 case 'd': // Data register (equivalent to 'r')
1050 case 'f': // Floating-point register
1051 case 'h': // High-part register
1052 case 'r': // General-purpose register
1053 case 'v': // Vector register
1054 return C_RegisterClass;
1055
1056 case 'Q': // Memory with base and unsigned 12-bit displacement
1057 case 'R': // Likewise, plus an index
1058 case 'S': // Memory with base and signed 20-bit displacement
1059 case 'T': // Likewise, plus an index
1060 case 'm': // Equivalent to 'T'.
1061 return C_Memory;
1062
1063 case 'I': // Unsigned 8-bit constant
1064 case 'J': // Unsigned 12-bit constant
1065 case 'K': // Signed 16-bit constant
1066 case 'L': // Signed 20-bit displacement (on all targets we support)
1067 case 'M': // 0x7fffffff
1068 return C_Immediate;
1069
1070 default:
1071 break;
1072 }
1073 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1074 switch (Constraint[1]) {
1075 case 'Q': // Address with base and unsigned 12-bit displacement
1076 case 'R': // Likewise, plus an index
1077 case 'S': // Address with base and signed 20-bit displacement
1078 case 'T': // Likewise, plus an index
1079 return C_Address;
1080
1081 default:
1082 break;
1083 }
1084 }
1085 return TargetLowering::getConstraintType(Constraint);
1086}
1087
1088TargetLowering::ConstraintWeight SystemZTargetLowering::
1089getSingleConstraintMatchWeight(AsmOperandInfo &info,
1090 const char *constraint) const {
1091 ConstraintWeight weight = CW_Invalid;
1092 Value *CallOperandVal = info.CallOperandVal;
1093 // If we don't have a value, we can't do a match,
1094 // but allow it at the lowest weight.
1095 if (!CallOperandVal)
1096 return CW_Default;
1097 Type *type = CallOperandVal->getType();
1098 // Look at the constraint type.
1099 switch (*constraint) {
1100 default:
1101 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
1102 break;
1103
1104 case 'a': // Address register
1105 case 'd': // Data register (equivalent to 'r')
1106 case 'h': // High-part register
1107 case 'r': // General-purpose register
1108 if (CallOperandVal->getType()->isIntegerTy())
1109 weight = CW_Register;
1110 break;
1111
1112 case 'f': // Floating-point register
1113 if (type->isFloatingPointTy())
1114 weight = CW_Register;
1115 break;
1116
1117 case 'v': // Vector register
1118 if ((type->isVectorTy() || type->isFloatingPointTy()) &&
1119 Subtarget.hasVector())
1120 weight = CW_Register;
1121 break;
1122
1123 case 'I': // Unsigned 8-bit constant
1124 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1125 if (isUInt<8>(C->getZExtValue()))
1126 weight = CW_Constant;
1127 break;
1128
1129 case 'J': // Unsigned 12-bit constant
1130 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1131 if (isUInt<12>(C->getZExtValue()))
1132 weight = CW_Constant;
1133 break;
1134
1135 case 'K': // Signed 16-bit constant
1136 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1137 if (isInt<16>(C->getSExtValue()))
1138 weight = CW_Constant;
1139 break;
1140
1141 case 'L': // Signed 20-bit displacement (on all targets we support)
1142 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1143 if (isInt<20>(C->getSExtValue()))
1144 weight = CW_Constant;
1145 break;
1146
1147 case 'M': // 0x7fffffff
1148 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1149 if (C->getZExtValue() == 0x7fffffff)
1150 weight = CW_Constant;
1151 break;
1152 }
1153 return weight;
1154}
1155
1156// Parse a "{tNNN}" register constraint for which the register type "t"
1157// has already been verified. MC is the class associated with "t" and
1158// Map maps 0-based register numbers to LLVM register numbers.
1159static std::pair<unsigned, const TargetRegisterClass *>
1160parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC,
1161 const unsigned *Map, unsigned Size) {
1162 assert(*(Constraint.end()-1) == '}' && "Missing '}'")(static_cast <bool> (*(Constraint.end()-1) == '}' &&
"Missing '}'") ? void (0) : __assert_fail ("*(Constraint.end()-1) == '}' && \"Missing '}'\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1162, __extension__
__PRETTY_FUNCTION__))
;
1163 if (isdigit(Constraint[2])) {
1164 unsigned Index;
1165 bool Failed =
1166 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1167 if (!Failed && Index < Size && Map[Index])
1168 return std::make_pair(Map[Index], RC);
1169 }
1170 return std::make_pair(0U, nullptr);
1171}
1172
1173std::pair<unsigned, const TargetRegisterClass *>
1174SystemZTargetLowering::getRegForInlineAsmConstraint(
1175 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1176 if (Constraint.size() == 1) {
1177 // GCC Constraint Letters
1178 switch (Constraint[0]) {
1179 default: break;
1180 case 'd': // Data register (equivalent to 'r')
1181 case 'r': // General-purpose register
1182 if (VT == MVT::i64)
1183 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1184 else if (VT == MVT::i128)
1185 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1186 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1187
1188 case 'a': // Address register
1189 if (VT == MVT::i64)
1190 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1191 else if (VT == MVT::i128)
1192 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1193 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1194
1195 case 'h': // High-part register (an LLVM extension)
1196 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1197
1198 case 'f': // Floating-point register
1199 if (!useSoftFloat()) {
1200 if (VT == MVT::f64)
1201 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1202 else if (VT == MVT::f128)
1203 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1204 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1205 }
1206 break;
1207 case 'v': // Vector register
1208 if (Subtarget.hasVector()) {
1209 if (VT == MVT::f32)
1210 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1211 if (VT == MVT::f64)
1212 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1213 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1214 }
1215 break;
1216 }
1217 }
1218 if (Constraint.size() > 0 && Constraint[0] == '{') {
1219 // We need to override the default register parsing for GPRs and FPRs
1220 // because the interpretation depends on VT. The internal names of
1221 // the registers are also different from the external names
1222 // (F0D and F0S instead of F0, etc.).
1223 if (Constraint[1] == 'r') {
1224 if (VT == MVT::i32)
1225 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1226 SystemZMC::GR32Regs, 16);
1227 if (VT == MVT::i128)
1228 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1229 SystemZMC::GR128Regs, 16);
1230 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1231 SystemZMC::GR64Regs, 16);
1232 }
1233 if (Constraint[1] == 'f') {
1234 if (useSoftFloat())
1235 return std::make_pair(
1236 0u, static_cast<const TargetRegisterClass *>(nullptr));
1237 if (VT == MVT::f32)
1238 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1239 SystemZMC::FP32Regs, 16);
1240 if (VT == MVT::f128)
1241 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1242 SystemZMC::FP128Regs, 16);
1243 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1244 SystemZMC::FP64Regs, 16);
1245 }
1246 if (Constraint[1] == 'v') {
1247 if (!Subtarget.hasVector())
1248 return std::make_pair(
1249 0u, static_cast<const TargetRegisterClass *>(nullptr));
1250 if (VT == MVT::f32)
1251 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1252 SystemZMC::VR32Regs, 32);
1253 if (VT == MVT::f64)
1254 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1255 SystemZMC::VR64Regs, 32);
1256 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1257 SystemZMC::VR128Regs, 32);
1258 }
1259 }
1260 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1261}
1262
1263// FIXME? Maybe this could be a TableGen attribute on some registers and
1264// this table could be generated automatically from RegInfo.
1265Register
1266SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT,
1267 const MachineFunction &MF) const {
1268 const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
1269
1270 Register Reg =
1271 StringSwitch<Register>(RegName)
1272 .Case("r4", Subtarget->isTargetXPLINK64() ? SystemZ::R4D : 0)
1273 .Case("r15", Subtarget->isTargetELF() ? SystemZ::R15D : 0)
1274 .Default(0);
1275
1276 if (Reg)
1277 return Reg;
1278 report_fatal_error("Invalid register name global variable");
1279}
1280
1281void SystemZTargetLowering::
1282LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1283 std::vector<SDValue> &Ops,
1284 SelectionDAG &DAG) const {
1285 // Only support length 1 constraints for now.
1286 if (Constraint.length() == 1) {
1287 switch (Constraint[0]) {
1288 case 'I': // Unsigned 8-bit constant
1289 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1290 if (isUInt<8>(C->getZExtValue()))
1291 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1292 Op.getValueType()));
1293 return;
1294
1295 case 'J': // Unsigned 12-bit constant
1296 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1297 if (isUInt<12>(C->getZExtValue()))
1298 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1299 Op.getValueType()));
1300 return;
1301
1302 case 'K': // Signed 16-bit constant
1303 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1304 if (isInt<16>(C->getSExtValue()))
1305 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1306 Op.getValueType()));
1307 return;
1308
1309 case 'L': // Signed 20-bit displacement (on all targets we support)
1310 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1311 if (isInt<20>(C->getSExtValue()))
1312 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1313 Op.getValueType()));
1314 return;
1315
1316 case 'M': // 0x7fffffff
1317 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1318 if (C->getZExtValue() == 0x7fffffff)
1319 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1320 Op.getValueType()));
1321 return;
1322 }
1323 }
1324 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1325}
1326
1327//===----------------------------------------------------------------------===//
1328// Calling conventions
1329//===----------------------------------------------------------------------===//
1330
1331#include "SystemZGenCallingConv.inc"
1332
1333const MCPhysReg *SystemZTargetLowering::getScratchRegisters(
1334 CallingConv::ID) const {
1335 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1336 SystemZ::R14D, 0 };
1337 return ScratchRegs;
1338}
1339
1340bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
1341 Type *ToType) const {
1342 return isTruncateFree(FromType, ToType);
1343}
1344
1345bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1346 return CI->isTailCall();
1347}
1348
1349// We do not yet support 128-bit single-element vector types. If the user
1350// attempts to use such types as function argument or return type, prefer
1351// to error out instead of emitting code violating the ABI.
1352static void VerifyVectorType(MVT VT, EVT ArgVT) {
1353 if (ArgVT.isVector() && !VT.isVector())
1354 report_fatal_error("Unsupported vector argument or return type");
1355}
1356
1357static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) {
1358 for (unsigned i = 0; i < Ins.size(); ++i)
1359 VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
1360}
1361
1362static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
1363 for (unsigned i = 0; i < Outs.size(); ++i)
1364 VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
1365}
1366
1367// Value is a value that has been passed to us in the location described by VA
1368// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1369// any loads onto Chain.
1370static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL,
1371 CCValAssign &VA, SDValue Chain,
1372 SDValue Value) {
1373 // If the argument has been promoted from a smaller type, insert an
1374 // assertion to capture this.
1375 if (VA.getLocInfo() == CCValAssign::SExt)
1376 Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
1377 DAG.getValueType(VA.getValVT()));
1378 else if (VA.getLocInfo() == CCValAssign::ZExt)
1379 Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
1380 DAG.getValueType(VA.getValVT()));
1381
1382 if (VA.isExtInLoc())
1383 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1384 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1385 // If this is a short vector argument loaded from the stack,
1386 // extend from i64 to full vector size and then bitcast.
1387 assert(VA.getLocVT() == MVT::i64)(static_cast <bool> (VA.getLocVT() == MVT::i64) ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1387, __extension__ __PRETTY_FUNCTION__))
;
1388 assert(VA.getValVT().isVector())(static_cast <bool> (VA.getValVT().isVector()) ? void (
0) : __assert_fail ("VA.getValVT().isVector()", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1388, __extension__ __PRETTY_FUNCTION__))
;
1389 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1390 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1391 } else
1392 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo")(static_cast <bool> (VA.getLocInfo() == CCValAssign::Full
&& "Unsupported getLocInfo") ? void (0) : __assert_fail
("VA.getLocInfo() == CCValAssign::Full && \"Unsupported getLocInfo\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1392, __extension__
__PRETTY_FUNCTION__))
;
1393 return Value;
1394}
1395
1396// Value is a value of type VA.getValVT() that we need to copy into
1397// the location described by VA. Return a copy of Value converted to
1398// VA.getValVT(). The caller is responsible for handling indirect values.
1399static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
1400 CCValAssign &VA, SDValue Value) {
1401 switch (VA.getLocInfo()) {
1402 case CCValAssign::SExt:
1403 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1404 case CCValAssign::ZExt:
1405 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1406 case CCValAssign::AExt:
1407 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1408 case CCValAssign::BCvt: {
1409 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128)(static_cast <bool> (VA.getLocVT() == MVT::i64 || VA.getLocVT
() == MVT::i128) ? void (0) : __assert_fail ("VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1409, __extension__
__PRETTY_FUNCTION__))
;
1410 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||(static_cast <bool> (VA.getValVT().isVector() || VA.getValVT
() == MVT::f32 || VA.getValVT() == MVT::f64 || VA.getValVT() ==
MVT::f128) ? void (0) : __assert_fail ("VA.getValVT().isVector() || VA.getValVT() == MVT::f32 || VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1411, __extension__
__PRETTY_FUNCTION__))
1411 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128)(static_cast <bool> (VA.getValVT().isVector() || VA.getValVT
() == MVT::f32 || VA.getValVT() == MVT::f64 || VA.getValVT() ==
MVT::f128) ? void (0) : __assert_fail ("VA.getValVT().isVector() || VA.getValVT() == MVT::f32 || VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1411, __extension__
__PRETTY_FUNCTION__))
;
1412 // For an f32 vararg we need to first promote it to an f64 and then
1413 // bitcast it to an i64.
1414 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1415 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1416 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1417 ? MVT::v2i64
1418 : VA.getLocVT();
1419 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1420 // For ELF, this is a short vector argument to be stored to the stack,
1421 // bitcast to v2i64 and then extract first element.
1422 if (BitCastToType == MVT::v2i64)
1423 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1424 DAG.getConstant(0, DL, MVT::i32));
1425 return Value;
1426 }
1427 case CCValAssign::Full:
1428 return Value;
1429 default:
1430 llvm_unreachable("Unhandled getLocInfo()")::llvm::llvm_unreachable_internal("Unhandled getLocInfo()", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1430)
;
1431 }
1432}
1433
1434static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) {
1435 SDLoc DL(In);
1436 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
1437 DAG.getIntPtrConstant(0, DL));
1438 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
1439 DAG.getIntPtrConstant(1, DL));
1440 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1441 MVT::Untyped, Hi, Lo);
1442 return SDValue(Pair, 0);
1443}
1444
1445static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) {
1446 SDLoc DL(In);
1447 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1448 DL, MVT::i64, In);
1449 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1450 DL, MVT::i64, In);
1451 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1452}
1453
1454bool SystemZTargetLowering::splitValueIntoRegisterParts(
1455 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1456 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1457 EVT ValueVT = Val.getValueType();
1458 assert((ValueVT != MVT::i128 ||(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1461, __extension__
__PRETTY_FUNCTION__))
1459 ((NumParts == 1 && PartVT == MVT::Untyped) ||(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1461, __extension__
__PRETTY_FUNCTION__))
1460 (NumParts == 2 && PartVT == MVT::i64))) &&(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1461, __extension__
__PRETTY_FUNCTION__))
1461 "Unknown handling of i128 value.")(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1461, __extension__
__PRETTY_FUNCTION__))
;
1462 if (ValueVT == MVT::i128 && NumParts == 1) {
1463 // Inline assembly operand.
1464 Parts[0] = lowerI128ToGR128(DAG, Val);
1465 return true;
1466 }
1467 return false;
1468}
1469
1470SDValue SystemZTargetLowering::joinRegisterPartsIntoValue(
1471 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1472 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1473 assert((ValueVT != MVT::i128 ||(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1476, __extension__
__PRETTY_FUNCTION__))
1474 ((NumParts == 1 && PartVT == MVT::Untyped) ||(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1476, __extension__
__PRETTY_FUNCTION__))
1475 (NumParts == 2 && PartVT == MVT::i64))) &&(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1476, __extension__
__PRETTY_FUNCTION__))
1476 "Unknown handling of i128 value.")(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1476, __extension__
__PRETTY_FUNCTION__))
;
1477 if (ValueVT == MVT::i128 && NumParts == 1)
1478 // Inline assembly operand.
1479 return lowerGR128ToI128(DAG, Parts[0]);
1480 return SDValue();
1481}
1482
1483SDValue SystemZTargetLowering::LowerFormalArguments(
1484 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1485 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1486 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1487 MachineFunction &MF = DAG.getMachineFunction();
1488 MachineFrameInfo &MFI = MF.getFrameInfo();
1489 MachineRegisterInfo &MRI = MF.getRegInfo();
1490 SystemZMachineFunctionInfo *FuncInfo =
1491 MF.getInfo<SystemZMachineFunctionInfo>();
1492 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1493 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1494
1495 // Detect unsupported vector argument types.
1496 if (Subtarget.hasVector())
1497 VerifyVectorTypes(Ins);
1498
1499 // Assign locations to all of the incoming arguments.
1500 SmallVector<CCValAssign, 16> ArgLocs;
1501 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1502 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1503
1504 unsigned NumFixedGPRs = 0;
1505 unsigned NumFixedFPRs = 0;
1506 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1507 SDValue ArgValue;
1508 CCValAssign &VA = ArgLocs[I];
1509 EVT LocVT = VA.getLocVT();
1510 if (VA.isRegLoc()) {
1511 // Arguments passed in registers
1512 const TargetRegisterClass *RC;
1513 switch (LocVT.getSimpleVT().SimpleTy) {
1514 default:
1515 // Integers smaller than i64 should be promoted to i64.
1516 llvm_unreachable("Unexpected argument type")::llvm::llvm_unreachable_internal("Unexpected argument type",
"llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1516)
;
1517 case MVT::i32:
1518 NumFixedGPRs += 1;
1519 RC = &SystemZ::GR32BitRegClass;
1520 break;
1521 case MVT::i64:
1522 NumFixedGPRs += 1;
1523 RC = &SystemZ::GR64BitRegClass;
1524 break;
1525 case MVT::f32:
1526 NumFixedFPRs += 1;
1527 RC = &SystemZ::FP32BitRegClass;
1528 break;
1529 case MVT::f64:
1530 NumFixedFPRs += 1;
1531 RC = &SystemZ::FP64BitRegClass;
1532 break;
1533 case MVT::f128:
1534 NumFixedFPRs += 2;
1535 RC = &SystemZ::FP128BitRegClass;
1536 break;
1537 case MVT::v16i8:
1538 case MVT::v8i16:
1539 case MVT::v4i32:
1540 case MVT::v2i64:
1541 case MVT::v4f32:
1542 case MVT::v2f64:
1543 RC = &SystemZ::VR128BitRegClass;
1544 break;
1545 }
1546
1547 Register VReg = MRI.createVirtualRegister(RC);
1548 MRI.addLiveIn(VA.getLocReg(), VReg);
1549 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1550 } else {
1551 assert(VA.isMemLoc() && "Argument not register or memory")(static_cast <bool> (VA.isMemLoc() && "Argument not register or memory"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"Argument not register or memory\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1551, __extension__
__PRETTY_FUNCTION__))
;
1552
1553 // Create the frame index object for this incoming parameter.
1554 // FIXME: Pre-include call frame size in the offset, should not
1555 // need to manually add it here.
1556 int64_t ArgSPOffset = VA.getLocMemOffset();
1557 if (Subtarget.isTargetXPLINK64()) {
1558 auto &XPRegs =
1559 Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
1560 ArgSPOffset += XPRegs.getCallFrameSize();
1561 }
1562 int FI =
1563 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1564
1565 // Create the SelectionDAG nodes corresponding to a load
1566 // from this parameter. Unpromoted ints and floats are
1567 // passed as right-justified 8-byte values.
1568 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1569 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1570 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1571 DAG.getIntPtrConstant(4, DL));
1572 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1573 MachinePointerInfo::getFixedStack(MF, FI));
1574 }
1575
1576 // Convert the value of the argument register into the value that's
1577 // being passed.
1578 if (VA.getLocInfo() == CCValAssign::Indirect) {
1579 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1580 MachinePointerInfo()));
1581 // If the original argument was split (e.g. i128), we need
1582 // to load all parts of it here (using the same address).
1583 unsigned ArgIndex = Ins[I].OrigArgIndex;
1584 assert (Ins[I].PartOffset == 0)(static_cast <bool> (Ins[I].PartOffset == 0) ? void (0)
: __assert_fail ("Ins[I].PartOffset == 0", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1584, __extension__ __PRETTY_FUNCTION__))
;
1585 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1586 CCValAssign &PartVA = ArgLocs[I + 1];
1587 unsigned PartOffset = Ins[I + 1].PartOffset;
1588 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1589 DAG.getIntPtrConstant(PartOffset, DL));
1590 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1591 MachinePointerInfo()));
1592 ++I;
1593 }
1594 } else
1595 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1596 }
1597
1598 // FIXME: Add support for lowering varargs for XPLINK64 in a later patch.
1599 if (IsVarArg && Subtarget.isTargetELF()) {
1600 // Save the number of non-varargs registers for later use by va_start, etc.
1601 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1602 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1603
1604 // Likewise the address (in the form of a frame index) of where the
1605 // first stack vararg would be. The 1-byte size here is arbitrary.
1606 int64_t StackSize = CCInfo.getNextStackOffset();
1607 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
1608
1609 // ...and a similar frame index for the caller-allocated save area
1610 // that will be used to store the incoming registers.
1611 int64_t RegSaveOffset =
1612 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1613 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1614 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1615
1616 // Store the FPR varargs in the reserved frame slots. (We store the
1617 // GPRs as part of the prologue.)
1618 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1619 SDValue MemOps[SystemZ::ELFNumArgFPRs];
1620 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1621 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1622 int FI =
1623 MFI.CreateFixedObject(8, -SystemZMC::ELFCallFrameSize + Offset, true);
1624 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1625 Register VReg = MF.addLiveIn(SystemZ::ELFArgFPRs[I],
1626 &SystemZ::FP64BitRegClass);
1627 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1628 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1629 MachinePointerInfo::getFixedStack(MF, FI));
1630 }
1631 // Join the stores, which are independent of one another.
1632 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1633 ArrayRef(&MemOps[NumFixedFPRs],
1634 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
1635 }
1636 }
1637
1638 // FIXME: For XPLINK64, Add in support for handling incoming "ADA" special
1639 // register (R5)
1640 return Chain;
1641}
1642
1643static bool canUseSiblingCall(const CCState &ArgCCInfo,
1644 SmallVectorImpl<CCValAssign> &ArgLocs,
1645 SmallVectorImpl<ISD::OutputArg> &Outs) {
1646 // Punt if there are any indirect or stack arguments, or if the call
1647 // needs the callee-saved argument register R6, or if the call uses
1648 // the callee-saved register arguments SwiftSelf and SwiftError.
1649 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1650 CCValAssign &VA = ArgLocs[I];
1651 if (VA.getLocInfo() == CCValAssign::Indirect)
1652 return false;
1653 if (!VA.isRegLoc())
1654 return false;
1655 Register Reg = VA.getLocReg();
1656 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1657 return false;
1658 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1659 return false;
1660 }
1661 return true;
1662}
1663
1664SDValue
1665SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
1666 SmallVectorImpl<SDValue> &InVals) const {
1667 SelectionDAG &DAG = CLI.DAG;
1668 SDLoc &DL = CLI.DL;
1669 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1670 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1671 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1672 SDValue Chain = CLI.Chain;
1673 SDValue Callee = CLI.Callee;
1674 bool &IsTailCall = CLI.IsTailCall;
1675 CallingConv::ID CallConv = CLI.CallConv;
1676 bool IsVarArg = CLI.IsVarArg;
1677 MachineFunction &MF = DAG.getMachineFunction();
1678 EVT PtrVT = getPointerTy(MF.getDataLayout());
1679 LLVMContext &Ctx = *DAG.getContext();
1680 SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
1681
1682 // FIXME: z/OS support to be added in later.
1683 if (Subtarget.isTargetXPLINK64())
1684 IsTailCall = false;
1685
1686 // Detect unsupported vector argument and return types.
1687 if (Subtarget.hasVector()) {
1688 VerifyVectorTypes(Outs);
1689 VerifyVectorTypes(Ins);
1690 }
1691
1692 // Analyze the operands of the call, assigning locations to each operand.
1693 SmallVector<CCValAssign, 16> ArgLocs;
1694 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1695 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1696
1697 // We don't support GuaranteedTailCallOpt, only automatically-detected
1698 // sibling calls.
1699 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1700 IsTailCall = false;
1701
1702 // Get a count of how many bytes are to be pushed on the stack.
1703 unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1704
1705 if (Subtarget.isTargetXPLINK64())
1706 // Although the XPLINK specifications for AMODE64 state that minimum size
1707 // of the param area is minimum 32 bytes and no rounding is otherwise
1708 // specified, we round this area in 64 bytes increments to be compatible
1709 // with existing compilers.
1710 NumBytes = std::max(64U, (unsigned)alignTo(NumBytes, 64));
1711
1712 // Mark the start of the call.
1713 if (!IsTailCall)
1714 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1715
1716 // Copy argument values to their designated locations.
1717 SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
1718 SmallVector<SDValue, 8> MemOpChains;
1719 SDValue StackPtr;
1720 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1721 CCValAssign &VA = ArgLocs[I];
1722 SDValue ArgValue = OutVals[I];
1723
1724 if (VA.getLocInfo() == CCValAssign::Indirect) {
1725 // Store the argument in a stack slot and pass its address.
1726 unsigned ArgIndex = Outs[I].OrigArgIndex;
1727 EVT SlotVT;
1728 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1729 // Allocate the full stack space for a promoted (and split) argument.
1730 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1731 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1732 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1733 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1734 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1735 } else {
1736 SlotVT = Outs[I].ArgVT;
1737 }
1738 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1739 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1740 MemOpChains.push_back(
1741 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1742 MachinePointerInfo::getFixedStack(MF, FI)));
1743 // If the original argument was split (e.g. i128), we need
1744 // to store all parts of it here (and pass just one address).
1745 assert (Outs[I].PartOffset == 0)(static_cast <bool> (Outs[I].PartOffset == 0) ? void (0
) : __assert_fail ("Outs[I].PartOffset == 0", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1745, __extension__ __PRETTY_FUNCTION__))
;
1746 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1747 SDValue PartValue = OutVals[I + 1];
1748 unsigned PartOffset = Outs[I + 1].PartOffset;
1749 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1750 DAG.getIntPtrConstant(PartOffset, DL));
1751 MemOpChains.push_back(
1752 DAG.getStore(Chain, DL, PartValue, Address,
1753 MachinePointerInfo::getFixedStack(MF, FI)));
1754 assert((PartOffset + PartValue.getValueType().getStoreSize() <=(static_cast <bool> ((PartOffset + PartValue.getValueType
().getStoreSize() <= SlotVT.getStoreSize()) && "Not enough space for argument part!"
) ? void (0) : __assert_fail ("(PartOffset + PartValue.getValueType().getStoreSize() <= SlotVT.getStoreSize()) && \"Not enough space for argument part!\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1755, __extension__
__PRETTY_FUNCTION__))
1755 SlotVT.getStoreSize()) && "Not enough space for argument part!")(static_cast <bool> ((PartOffset + PartValue.getValueType
().getStoreSize() <= SlotVT.getStoreSize()) && "Not enough space for argument part!"
) ? void (0) : __assert_fail ("(PartOffset + PartValue.getValueType().getStoreSize() <= SlotVT.getStoreSize()) && \"Not enough space for argument part!\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1755, __extension__
__PRETTY_FUNCTION__))
;
1756 ++I;
1757 }
1758 ArgValue = SpillSlot;
1759 } else
1760 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1761
1762 if (VA.isRegLoc()) {
1763 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
1764 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
1765 // and low values.
1766 if (VA.getLocVT() == MVT::i128)
1767 ArgValue = lowerI128ToGR128(DAG, ArgValue);
1768 // Queue up the argument copies and emit them at the end.
1769 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1770 } else {
1771 assert(VA.isMemLoc() && "Argument not register or memory")(static_cast <bool> (VA.isMemLoc() && "Argument not register or memory"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"Argument not register or memory\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1771, __extension__
__PRETTY_FUNCTION__))
;
1772
1773 // Work out the address of the stack slot. Unpromoted ints and
1774 // floats are passed as right-justified 8-byte values.
1775 if (!StackPtr.getNode())
1776 StackPtr = DAG.getCopyFromReg(Chain, DL,
1777 Regs->getStackPointerRegister(), PtrVT);
1778 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
1779 VA.getLocMemOffset();
1780 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1781 Offset += 4;
1782 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1783 DAG.getIntPtrConstant(Offset, DL));
1784
1785 // Emit the store.
1786 MemOpChains.push_back(
1787 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1788
1789 // Although long doubles or vectors are passed through the stack when
1790 // they are vararg (non-fixed arguments), if a long double or vector
1791 // occupies the third and fourth slot of the argument list GPR3 should
1792 // still shadow the third slot of the argument list.
1793 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
1794 SDValue ShadowArgValue =
1795 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
1796 DAG.getIntPtrConstant(1, DL));
1797 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
1798 }
1799 }
1800 }
1801
1802 // Join the stores, which are independent of one another.
1803 if (!MemOpChains.empty())
1804 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1805
1806 // Accept direct calls by converting symbolic call addresses to the
1807 // associated Target* opcodes. Force %r1 to be used for indirect
1808 // tail calls.
1809 SDValue Glue;
1810 // FIXME: Add support for XPLINK using the ADA register.
1811 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1812 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1813 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1814 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1815 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1816 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1817 } else if (IsTailCall) {
1818 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1819 Glue = Chain.getValue(1);
1820 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1821 }
1822
1823 // Build a sequence of copy-to-reg nodes, chained and glued together.
1824 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1825 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1826 RegsToPass[I].second, Glue);
1827 Glue = Chain.getValue(1);
1828 }
1829
1830 // The first call operand is the chain and the second is the target address.
1831 SmallVector<SDValue, 8> Ops;
1832 Ops.push_back(Chain);
1833 Ops.push_back(Callee);
1834
1835 // Add argument registers to the end of the list so that they are
1836 // known live into the call.
1837 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1838 Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1839 RegsToPass[I].second.getValueType()));
1840
1841 // Add a register mask operand representing the call-preserved registers.
1842 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1843 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1844 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1844, __extension__
__PRETTY_FUNCTION__))
;
1845 Ops.push_back(DAG.getRegisterMask(Mask));
1846
1847 // Glue the call to the argument copies, if any.
1848 if (Glue.getNode())
1849 Ops.push_back(Glue);
1850
1851 // Emit the call.
1852 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1853 if (IsTailCall)
1854 return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1855 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
1856 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
1857 Glue = Chain.getValue(1);
1858
1859 // Mark the end of the call, which is glued to the call itself.
1860 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
1861 Glue = Chain.getValue(1);
1862
1863 // Assign locations to each value returned by this call.
1864 SmallVector<CCValAssign, 16> RetLocs;
1865 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
1866 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1867
1868 // Copy all of the result registers out of their specified physreg.
1869 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1870 CCValAssign &VA = RetLocs[I];
1871
1872 // Copy the value out, gluing the copy to the end of the call sequence.
1873 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1874 VA.getLocVT(), Glue);
1875 Chain = RetValue.getValue(1);
1876 Glue = RetValue.getValue(2);
1877
1878 // Convert the value of the return register into the value that's
1879 // being returned.
1880 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1881 }
1882
1883 return Chain;
1884}
1885
1886// Generate a call taking the given operands as arguments and returning a
1887// result of type RetVT.
1888std::pair<SDValue, SDValue> SystemZTargetLowering::makeExternalCall(
1889 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
1890 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
1891 bool DoesNotReturn, bool IsReturnValueUsed) const {
1892 TargetLowering::ArgListTy Args;
1893 Args.reserve(Ops.size());
1894
1895 TargetLowering::ArgListEntry Entry;
1896 for (SDValue Op : Ops) {
1897 Entry.Node = Op;
1898 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1899 Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
1900 Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
1901 Args.push_back(Entry);
1902 }
1903
1904 SDValue Callee =
1905 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
1906
1907 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
1908 TargetLowering::CallLoweringInfo CLI(DAG);
1909 bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned);
1910 CLI.setDebugLoc(DL)
1911 .setChain(Chain)
1912 .setCallee(CallConv, RetTy, Callee, std::move(Args))
1913 .setNoReturn(DoesNotReturn)
1914 .setDiscardResult(!IsReturnValueUsed)
1915 .setSExtResult(SignExtend)
1916 .setZExtResult(!SignExtend);
1917 return LowerCallTo(CLI);
1918}
1919
1920bool SystemZTargetLowering::
1921CanLowerReturn(CallingConv::ID CallConv,
1922 MachineFunction &MF, bool isVarArg,
1923 const SmallVectorImpl<ISD::OutputArg> &Outs,
1924 LLVMContext &Context) const {
1925 // Detect unsupported vector return types.
1926 if (Subtarget.hasVector())
1927 VerifyVectorTypes(Outs);
1928
1929 // Special case that we cannot easily detect in RetCC_SystemZ since
1930 // i128 is not a legal type.
1931 for (auto &Out : Outs)
1932 if (Out.ArgVT == MVT::i128)
1933 return false;
1934
1935 SmallVector<CCValAssign, 16> RetLocs;
1936 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1937 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1938}
1939
1940SDValue
1941SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
1942 bool IsVarArg,
1943 const SmallVectorImpl<ISD::OutputArg> &Outs,
1944 const SmallVectorImpl<SDValue> &OutVals,
1945 const SDLoc &DL, SelectionDAG &DAG) const {
1946 MachineFunction &MF = DAG.getMachineFunction();
1947
1948 // Detect unsupported vector return types.
1949 if (Subtarget.hasVector())
1950 VerifyVectorTypes(Outs);
1951
1952 // Assign locations to each returned value.
1953 SmallVector<CCValAssign, 16> RetLocs;
1954 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1955 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1956
1957 // Quick exit for void returns
1958 if (RetLocs.empty())
1959 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1960
1961 if (CallConv == CallingConv::GHC)
1962 report_fatal_error("GHC functions return void only");
1963
1964 // Copy the result values into the output registers.
1965 SDValue Glue;
1966 SmallVector<SDValue, 4> RetOps;
1967 RetOps.push_back(Chain);
1968 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1969 CCValAssign &VA = RetLocs[I];
1970 SDValue RetValue = OutVals[I];
1971
1972 // Make the return register live on exit.
1973 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1973, __extension__
__PRETTY_FUNCTION__))
;
1974
1975 // Promote the value as required.
1976 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1977
1978 // Chain and glue the copies together.
1979 Register Reg = VA.getLocReg();
1980 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1981 Glue = Chain.getValue(1);
1982 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1983 }
1984
1985 // Update chain and glue.
1986 RetOps[0] = Chain;
1987 if (Glue.getNode())
1988 RetOps.push_back(Glue);
1989
1990 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
1991}
1992
1993// Return true if Op is an intrinsic node with chain that returns the CC value
1994// as its only (other) argument. Provide the associated SystemZISD opcode and
1995// the mask of valid CC values if so.
1996static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1997 unsigned &CCValid) {
1998 unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1999 switch (Id) {
2000 case Intrinsic::s390_tbegin:
2001 Opcode = SystemZISD::TBEGIN;
2002 CCValid = SystemZ::CCMASK_TBEGIN;
2003 return true;
2004
2005 case Intrinsic::s390_tbegin_nofloat:
2006 Opcode = SystemZISD::TBEGIN_NOFLOAT;
2007 CCValid = SystemZ::CCMASK_TBEGIN;
2008 return true;
2009
2010 case Intrinsic::s390_tend:
2011 Opcode = SystemZISD::TEND;
2012 CCValid = SystemZ::CCMASK_TEND;
2013 return true;
2014
2015 default:
2016 return false;
2017 }
2018}
2019
2020// Return true if Op is an intrinsic node without chain that returns the
2021// CC value as its final argument. Provide the associated SystemZISD
2022// opcode and the mask of valid CC values if so.
2023static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2024 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2025 switch (Id) {
2026 case Intrinsic::s390_vpkshs:
2027 case Intrinsic::s390_vpksfs:
2028 case Intrinsic::s390_vpksgs:
2029 Opcode = SystemZISD::PACKS_CC;
2030 CCValid = SystemZ::CCMASK_VCMP;
2031 return true;
2032
2033 case Intrinsic::s390_vpklshs:
2034 case Intrinsic::s390_vpklsfs:
2035 case Intrinsic::s390_vpklsgs:
2036 Opcode = SystemZISD::PACKLS_CC;
2037 CCValid = SystemZ::CCMASK_VCMP;
2038 return true;
2039
2040 case Intrinsic::s390_vceqbs:
2041 case Intrinsic::s390_vceqhs:
2042 case Intrinsic::s390_vceqfs:
2043 case Intrinsic::s390_vceqgs:
2044 Opcode = SystemZISD::VICMPES;
2045 CCValid = SystemZ::CCMASK_VCMP;
2046 return true;
2047
2048 case Intrinsic::s390_vchbs:
2049 case Intrinsic::s390_vchhs:
2050 case Intrinsic::s390_vchfs:
2051 case Intrinsic::s390_vchgs:
2052 Opcode = SystemZISD::VICMPHS;
2053 CCValid = SystemZ::CCMASK_VCMP;
2054 return true;
2055
2056 case Intrinsic::s390_vchlbs:
2057 case Intrinsic::s390_vchlhs:
2058 case Intrinsic::s390_vchlfs:
2059 case Intrinsic::s390_vchlgs:
2060 Opcode = SystemZISD::VICMPHLS;
2061 CCValid = SystemZ::CCMASK_VCMP;
2062 return true;
2063
2064 case Intrinsic::s390_vtm:
2065 Opcode = SystemZISD::VTM;
2066 CCValid = SystemZ::CCMASK_VCMP;
2067 return true;
2068
2069 case Intrinsic::s390_vfaebs:
2070 case Intrinsic::s390_vfaehs:
2071 case Intrinsic::s390_vfaefs:
2072 Opcode = SystemZISD::VFAE_CC;
2073 CCValid = SystemZ::CCMASK_ANY;
2074 return true;
2075
2076 case Intrinsic::s390_vfaezbs:
2077 case Intrinsic::s390_vfaezhs:
2078 case Intrinsic::s390_vfaezfs:
2079 Opcode = SystemZISD::VFAEZ_CC;
2080 CCValid = SystemZ::CCMASK_ANY;
2081 return true;
2082
2083 case Intrinsic::s390_vfeebs:
2084 case Intrinsic::s390_vfeehs:
2085 case Intrinsic::s390_vfeefs:
2086 Opcode = SystemZISD::VFEE_CC;
2087 CCValid = SystemZ::CCMASK_ANY;
2088 return true;
2089
2090 case Intrinsic::s390_vfeezbs:
2091 case Intrinsic::s390_vfeezhs:
2092 case Intrinsic::s390_vfeezfs:
2093 Opcode = SystemZISD::VFEEZ_CC;
2094 CCValid = SystemZ::CCMASK_ANY;
2095 return true;
2096
2097 case Intrinsic::s390_vfenebs:
2098 case Intrinsic::s390_vfenehs:
2099 case Intrinsic::s390_vfenefs:
2100 Opcode = SystemZISD::VFENE_CC;
2101 CCValid = SystemZ::CCMASK_ANY;
2102 return true;
2103
2104 case Intrinsic::s390_vfenezbs:
2105 case Intrinsic::s390_vfenezhs:
2106 case Intrinsic::s390_vfenezfs:
2107 Opcode = SystemZISD::VFENEZ_CC;
2108 CCValid = SystemZ::CCMASK_ANY;
2109 return true;
2110
2111 case Intrinsic::s390_vistrbs:
2112 case Intrinsic::s390_vistrhs:
2113 case Intrinsic::s390_vistrfs:
2114 Opcode = SystemZISD::VISTR_CC;
2115 CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3;
2116 return true;
2117
2118 case Intrinsic::s390_vstrcbs:
2119 case Intrinsic::s390_vstrchs:
2120 case Intrinsic::s390_vstrcfs:
2121 Opcode = SystemZISD::VSTRC_CC;
2122 CCValid = SystemZ::CCMASK_ANY;
2123 return true;
2124
2125 case Intrinsic::s390_vstrczbs:
2126 case Intrinsic::s390_vstrczhs:
2127 case Intrinsic::s390_vstrczfs:
2128 Opcode = SystemZISD::VSTRCZ_CC;
2129 CCValid = SystemZ::CCMASK_ANY;
2130 return true;
2131
2132 case Intrinsic::s390_vstrsb:
2133 case Intrinsic::s390_vstrsh:
2134 case Intrinsic::s390_vstrsf:
2135 Opcode = SystemZISD::VSTRS_CC;
2136 CCValid = SystemZ::CCMASK_ANY;
2137 return true;
2138
2139 case Intrinsic::s390_vstrszb:
2140 case Intrinsic::s390_vstrszh:
2141 case Intrinsic::s390_vstrszf:
2142 Opcode = SystemZISD::VSTRSZ_CC;
2143 CCValid = SystemZ::CCMASK_ANY;
2144 return true;
2145
2146 case Intrinsic::s390_vfcedbs:
2147 case Intrinsic::s390_vfcesbs:
2148 Opcode = SystemZISD::VFCMPES;
2149 CCValid = SystemZ::CCMASK_VCMP;
2150 return true;
2151
2152 case Intrinsic::s390_vfchdbs:
2153 case Intrinsic::s390_vfchsbs:
2154 Opcode = SystemZISD::VFCMPHS;
2155 CCValid = SystemZ::CCMASK_VCMP;
2156 return true;
2157
2158 case Intrinsic::s390_vfchedbs:
2159 case Intrinsic::s390_vfchesbs:
2160 Opcode = SystemZISD::VFCMPHES;
2161 CCValid = SystemZ::CCMASK_VCMP;
2162 return true;
2163
2164 case Intrinsic::s390_vftcidb:
2165 case Intrinsic::s390_vftcisb:
2166 Opcode = SystemZISD::VFTCI;
2167 CCValid = SystemZ::CCMASK_VCMP;
2168 return true;
2169
2170 case Intrinsic::s390_tdc:
2171 Opcode = SystemZISD::TDC;
2172 CCValid = SystemZ::CCMASK_TDC;
2173 return true;
2174
2175 default:
2176 return false;
2177 }
2178}
2179
2180// Emit an intrinsic with chain and an explicit CC register result.
2181static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op,
2182 unsigned Opcode) {
2183 // Copy all operands except the intrinsic ID.
2184 unsigned NumOps = Op.getNumOperands();
2185 SmallVector<SDValue, 6> Ops;
2186 Ops.reserve(NumOps - 1);
2187 Ops.push_back(Op.getOperand(0));
2188 for (unsigned I = 2; I < NumOps; ++I)
2189 Ops.push_back(Op.getOperand(I));
2190
2191 assert(Op->getNumValues() == 2 && "Expected only CC result and chain")(static_cast <bool> (Op->getNumValues() == 2 &&
"Expected only CC result and chain") ? void (0) : __assert_fail
("Op->getNumValues() == 2 && \"Expected only CC result and chain\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2191, __extension__
__PRETTY_FUNCTION__))
;
2192 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2193 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2194 SDValue OldChain = SDValue(Op.getNode(), 1);
2195 SDValue NewChain = SDValue(Intr.getNode(), 1);
2196 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2197 return Intr.getNode();
2198}
2199
2200// Emit an intrinsic with an explicit CC register result.
2201static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op,
2202 unsigned Opcode) {
2203 // Copy all operands except the intrinsic ID.
2204 unsigned NumOps = Op.getNumOperands();
2205 SmallVector<SDValue, 6> Ops;
2206 Ops.reserve(NumOps - 1);
2207 for (unsigned I = 1; I < NumOps; ++I)
2208 Ops.push_back(Op.getOperand(I));
2209
2210 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2211 return Intr.getNode();
2212}
2213
2214// CC is a comparison that will be implemented using an integer or
2215// floating-point comparison. Return the condition code mask for
2216// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2217// unsigned comparisons and clear for signed ones. In the floating-point
2218// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2219static unsigned CCMaskForCondCode(ISD::CondCode CC) {
2220#define CONV(X) \
2221 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2222 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2223 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2224
2225 switch (CC) {
2226 default:
2227 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2227)
;
2228
2229 CONV(EQ);
2230 CONV(NE);
2231 CONV(GT);
2232 CONV(GE);
2233 CONV(LT);
2234 CONV(LE);
2235
2236 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2237 case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
2238 }
2239#undef CONV
2240}
2241
2242// If C can be converted to a comparison against zero, adjust the operands
2243// as necessary.
2244static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2245 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2246 return;
2247
2248 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2249 if (!ConstOp1)
2250 return;
2251
2252 int64_t Value = ConstOp1->getSExtValue();
2253 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2254 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2255 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2256 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2257 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2258 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2259 }
2260}
2261
2262// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2263// adjust the operands as necessary.
2264static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2265 Comparison &C) {
2266 // For us to make any changes, it must a comparison between a single-use
2267 // load and a constant.
2268 if (!C.Op0.hasOneUse() ||
2269 C.Op0.getOpcode() != ISD::LOAD ||
2270 C.Op1.getOpcode() != ISD::Constant)
2271 return;
2272
2273 // We must have an 8- or 16-bit load.
2274 auto *Load = cast<LoadSDNode>(C.Op0);
2275 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2276 if ((NumBits != 8 && NumBits != 16) ||
2277 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2278 return;
2279
2280 // The load must be an extending one and the constant must be within the
2281 // range of the unextended value.
2282 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2283 uint64_t Value = ConstOp1->getZExtValue();
2284 uint64_t Mask = (1 << NumBits) - 1;
2285 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2286 // Make sure that ConstOp1 is in range of C.Op0.
2287 int64_t SignedValue = ConstOp1->getSExtValue();
2288 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2289 return;
2290 if (C.ICmpType != SystemZICMP::SignedOnly) {
2291 // Unsigned comparison between two sign-extended values is equivalent
2292 // to unsigned comparison between two zero-extended values.
2293 Value &= Mask;
2294 } else if (NumBits == 8) {
2295 // Try to treat the comparison as unsigned, so that we can use CLI.
2296 // Adjust CCMask and Value as necessary.
2297 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2298 // Test whether the high bit of the byte is set.
2299 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2300 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2301 // Test whether the high bit of the byte is clear.
2302 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2303 else
2304 // No instruction exists for this combination.
2305 return;
2306 C.ICmpType = SystemZICMP::UnsignedOnly;
2307 }
2308 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2309 if (Value > Mask)
2310 return;
2311 // If the constant is in range, we can use any comparison.
2312 C.ICmpType = SystemZICMP::Any;
2313 } else
2314 return;
2315
2316 // Make sure that the first operand is an i32 of the right extension type.
2317 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2318 ISD::SEXTLOAD :
2319 ISD::ZEXTLOAD);
2320 if (C.Op0.getValueType() != MVT::i32 ||
2321 Load->getExtensionType() != ExtType) {
2322 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2323 Load->getBasePtr(), Load->getPointerInfo(),
2324 Load->getMemoryVT(), Load->getAlign(),
2325 Load->getMemOperand()->getFlags());
2326 // Update the chain uses.
2327 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2328 }
2329
2330 // Make sure that the second operand is an i32 with the right value.
2331 if (C.Op1.getValueType() != MVT::i32 ||
2332 Value != ConstOp1->getZExtValue())
2333 C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2334}
2335
2336// Return true if Op is either an unextended load, or a load suitable
2337// for integer register-memory comparisons of type ICmpType.
2338static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2339 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2340 if (Load) {
2341 // There are no instructions to compare a register with a memory byte.
2342 if (Load->getMemoryVT() == MVT::i8)
2343 return false;
2344 // Otherwise decide on extension type.
2345 switch (Load->getExtensionType()) {
2346 case ISD::NON_EXTLOAD:
2347 return true;
2348 case ISD::SEXTLOAD:
2349 return ICmpType != SystemZICMP::UnsignedOnly;
2350 case ISD::ZEXTLOAD:
2351 return ICmpType != SystemZICMP::SignedOnly;
2352 default:
2353 break;
2354 }
2355 }
2356 return false;
2357}
2358
2359// Return true if it is better to swap the operands of C.
2360static bool shouldSwapCmpOperands(const Comparison &C) {
2361 // Leave f128 comparisons alone, since they have no memory forms.
2362 if (C.Op0.getValueType() == MVT::f128)
2363 return false;
2364
2365 // Always keep a floating-point constant second, since comparisons with
2366 // zero can use LOAD TEST and comparisons with other constants make a
2367 // natural memory operand.
2368 if (isa<ConstantFPSDNode>(C.Op1))
2369 return false;
2370
2371 // Never swap comparisons with zero since there are many ways to optimize
2372 // those later.
2373 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2374 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2375 return false;
2376
2377 // Also keep natural memory operands second if the loaded value is
2378 // only used here. Several comparisons have memory forms.
2379 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2380 return false;
2381
2382 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2383 // In that case we generally prefer the memory to be second.
2384 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2385 // The only exceptions are when the second operand is a constant and
2386 // we can use things like CHHSI.
2387 if (!ConstOp1)
2388 return true;
2389 // The unsigned memory-immediate instructions can handle 16-bit
2390 // unsigned integers.
2391 if (C.ICmpType != SystemZICMP::SignedOnly &&
2392 isUInt<16>(ConstOp1->getZExtValue()))
2393 return false;
2394 // The signed memory-immediate instructions can handle 16-bit
2395 // signed integers.
2396 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2397 isInt<16>(ConstOp1->getSExtValue()))
2398 return false;
2399 return true;
2400 }
2401
2402 // Try to promote the use of CGFR and CLGFR.
2403 unsigned Opcode0 = C.Op0.getOpcode();
2404 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2405 return true;
2406 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2407 return true;
2408 if (C.ICmpType != SystemZICMP::SignedOnly &&
2409 Opcode0 == ISD::AND &&
2410 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2411 cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
2412 return true;
2413
2414 return false;
2415}
2416
2417// Check whether C tests for equality between X and Y and whether X - Y
2418// or Y - X is also computed. In that case it's better to compare the
2419// result of the subtraction against zero.
2420static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
2421 Comparison &C) {
2422 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2423 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2424 for (SDNode *N : C.Op0->uses()) {
2425 if (N->getOpcode() == ISD::SUB &&
2426 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2427 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2428 C.Op0 = SDValue(N, 0);
2429 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2430 return;
2431 }
2432 }
2433 }
2434}
2435
2436// Check whether C compares a floating-point value with zero and if that
2437// floating-point value is also negated. In this case we can use the
2438// negation to set CC, so avoiding separate LOAD AND TEST and
2439// LOAD (NEGATIVE/COMPLEMENT) instructions.
2440static void adjustForFNeg(Comparison &C) {
2441 // This optimization is invalid for strict comparisons, since FNEG
2442 // does not raise any exceptions.
2443 if (C.Chain)
2444 return;
2445 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2446 if (C1 && C1->isZero()) {
2447 for (SDNode *N : C.Op0->uses()) {
2448 if (N->getOpcode() == ISD::FNEG) {
2449 C.Op0 = SDValue(N, 0);
2450 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2451 return;
2452 }
2453 }
2454 }
2455}
2456
2457// Check whether C compares (shl X, 32) with 0 and whether X is
2458// also sign-extended. In that case it is better to test the result
2459// of the sign extension using LTGFR.
2460//
2461// This case is important because InstCombine transforms a comparison
2462// with (sext (trunc X)) into a comparison with (shl X, 32).
2463static void adjustForLTGFR(Comparison &C) {
2464 // Check for a comparison between (shl X, 32) and 0.
2465 if (C.Op0.getOpcode() == ISD::SHL &&
2466 C.Op0.getValueType() == MVT::i64 &&
2467 C.Op1.getOpcode() == ISD::Constant &&
2468 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2469 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2470 if (C1 && C1->getZExtValue() == 32) {
2471 SDValue ShlOp0 = C.Op0.getOperand(0);
2472 // See whether X has any SIGN_EXTEND_INREG uses.
2473 for (SDNode *N : ShlOp0->uses()) {
2474 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2475 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2476 C.Op0 = SDValue(N, 0);
2477 return;
2478 }
2479 }
2480 }
2481 }
2482}
2483
2484// If C compares the truncation of an extending load, try to compare
2485// the untruncated value instead. This exposes more opportunities to
2486// reuse CC.
2487static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2488 Comparison &C) {
2489 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2490 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2491 C.Op1.getOpcode() == ISD::Constant &&
2492 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2493 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2494 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
2495 C.Op0.getValueSizeInBits().getFixedValue()) {
2496 unsigned Type = L->getExtensionType();
2497 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2498 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2499 C.Op0 = C.Op0.getOperand(0);
2500 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2501 }
2502 }
2503 }
2504}
2505
2506// Return true if shift operation N has an in-range constant shift value.
2507// Store it in ShiftVal if so.
2508static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2509 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2510 if (!Shift)
2511 return false;
2512
2513 uint64_t Amount = Shift->getZExtValue();
2514 if (Amount >= N.getValueSizeInBits())
2515 return false;
2516
2517 ShiftVal = Amount;
2518 return true;
2519}
2520
2521// Check whether an AND with Mask is suitable for a TEST UNDER MASK
2522// instruction and whether the CC value is descriptive enough to handle
2523// a comparison of type Opcode between the AND result and CmpVal.
2524// CCMask says which comparison result is being tested and BitSize is
2525// the number of bits in the operands. If TEST UNDER MASK can be used,
2526// return the corresponding CC mask, otherwise return 0.
2527static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2528 uint64_t Mask, uint64_t CmpVal,
2529 unsigned ICmpType) {
2530 assert(Mask != 0 && "ANDs with zero should have been removed by now")(static_cast <bool> (Mask != 0 && "ANDs with zero should have been removed by now"
) ? void (0) : __assert_fail ("Mask != 0 && \"ANDs with zero should have been removed by now\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2530, __extension__
__PRETTY_FUNCTION__))
;
2531
2532 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2533 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2534 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2535 return 0;
2536
2537 // Work out the masks for the lowest and highest bits.
2538 unsigned HighShift = 63 - countLeadingZeros(Mask);
2539 uint64_t High = uint64_t(1) << HighShift;
2540 uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
2541
2542 // Signed ordered comparisons are effectively unsigned if the sign
2543 // bit is dropped.
2544 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2545
2546 // Check for equality comparisons with 0, or the equivalent.
2547 if (CmpVal == 0) {
2548 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2549 return SystemZ::CCMASK_TM_ALL_0;
2550 if (CCMask == SystemZ::CCMASK_CMP_NE)
2551 return SystemZ::CCMASK_TM_SOME_1;
2552 }
2553 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2554 if (CCMask == SystemZ::CCMASK_CMP_LT)
2555 return SystemZ::CCMASK_TM_ALL_0;
2556 if (CCMask == SystemZ::CCMASK_CMP_GE)
2557 return SystemZ::CCMASK_TM_SOME_1;
2558 }
2559 if (EffectivelyUnsigned && CmpVal < Low) {
2560 if (CCMask == SystemZ::CCMASK_CMP_LE)
2561 return SystemZ::CCMASK_TM_ALL_0;
2562 if (CCMask == SystemZ::CCMASK_CMP_GT)
2563 return SystemZ::CCMASK_TM_SOME_1;
2564 }
2565
2566 // Check for equality comparisons with the mask, or the equivalent.
2567 if (CmpVal == Mask) {
2568 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2569 return SystemZ::CCMASK_TM_ALL_1;
2570 if (CCMask == SystemZ::CCMASK_CMP_NE)
2571 return SystemZ::CCMASK_TM_SOME_0;
2572 }
2573 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2574 if (CCMask == SystemZ::CCMASK_CMP_GT)
2575 return SystemZ::CCMASK_TM_ALL_1;
2576 if (CCMask == SystemZ::CCMASK_CMP_LE)
2577 return SystemZ::CCMASK_TM_SOME_0;
2578 }
2579 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2580 if (CCMask == SystemZ::CCMASK_CMP_GE)
2581 return SystemZ::CCMASK_TM_ALL_1;
2582 if (CCMask == SystemZ::CCMASK_CMP_LT)
2583 return SystemZ::CCMASK_TM_SOME_0;
2584 }
2585
2586 // Check for ordered comparisons with the top bit.
2587 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2588 if (CCMask == SystemZ::CCMASK_CMP_LE)
2589 return SystemZ::CCMASK_TM_MSB_0;
2590 if (CCMask == SystemZ::CCMASK_CMP_GT)
2591 return SystemZ::CCMASK_TM_MSB_1;
2592 }
2593 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2594 if (CCMask == SystemZ::CCMASK_CMP_LT)
2595 return SystemZ::CCMASK_TM_MSB_0;
2596 if (CCMask == SystemZ::CCMASK_CMP_GE)
2597 return SystemZ::CCMASK_TM_MSB_1;
2598 }
2599
2600 // If there are just two bits, we can do equality checks for Low and High
2601 // as well.
2602 if (Mask == Low + High) {
2603 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2604 return SystemZ::CCMASK_TM_MIXED_MSB_0;
2605 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2606 return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY;
2607 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2608 return SystemZ::CCMASK_TM_MIXED_MSB_1;
2609 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2610 return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY;
2611 }
2612
2613 // Looks like we've exhausted our options.
2614 return 0;
2615}
2616
2617// See whether C can be implemented as a TEST UNDER MASK instruction.
2618// Update the arguments with the TM version if so.
2619static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
2620 Comparison &C) {
2621 // Check that we have a comparison with a constant.
2622 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2623 if (!ConstOp1)
2624 return;
2625 uint64_t CmpVal = ConstOp1->getZExtValue();
2626
2627 // Check whether the nonconstant input is an AND with a constant mask.
2628 Comparison NewC(C);
2629 uint64_t MaskVal;
2630 ConstantSDNode *Mask = nullptr;
2631 if (C.Op0.getOpcode() == ISD::AND) {
2632 NewC.Op0 = C.Op0.getOperand(0);
2633 NewC.Op1 = C.Op0.getOperand(1);
2634 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2635 if (!Mask)
2636 return;
2637 MaskVal = Mask->getZExtValue();
2638 } else {
2639 // There is no instruction to compare with a 64-bit immediate
2640 // so use TMHH instead if possible. We need an unsigned ordered
2641 // comparison with an i64 immediate.
2642 if (NewC.Op0.getValueType() != MVT::i64 ||
2643 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2644 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2645 NewC.ICmpType == SystemZICMP::SignedOnly)
2646 return;
2647 // Convert LE and GT comparisons into LT and GE.
2648 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2649 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2650 if (CmpVal == uint64_t(-1))
2651 return;
2652 CmpVal += 1;
2653 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2654 }
2655 // If the low N bits of Op1 are zero than the low N bits of Op0 can
2656 // be masked off without changing the result.
2657 MaskVal = -(CmpVal & -CmpVal);
2658 NewC.ICmpType = SystemZICMP::UnsignedOnly;
2659 }
2660 if (!MaskVal)
2661 return;
2662
2663 // Check whether the combination of mask, comparison value and comparison
2664 // type are suitable.
2665 unsigned BitSize = NewC.Op0.getValueSizeInBits();
2666 unsigned NewCCMask, ShiftVal;
2667 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2668 NewC.Op0.getOpcode() == ISD::SHL &&
2669 isSimpleShift(NewC.Op0, ShiftVal) &&
2670 (MaskVal >> ShiftVal != 0) &&
2671 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2672 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2673 MaskVal >> ShiftVal,
2674 CmpVal >> ShiftVal,
2675 SystemZICMP::Any))) {
2676 NewC.Op0 = NewC.Op0.getOperand(0);
2677 MaskVal >>= ShiftVal;
2678 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2679 NewC.Op0.getOpcode() == ISD::SRL &&
2680 isSimpleShift(NewC.Op0, ShiftVal) &&
2681 (MaskVal << ShiftVal != 0) &&
2682 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2683 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2684 MaskVal << ShiftVal,
2685 CmpVal << ShiftVal,
2686 SystemZICMP::UnsignedOnly))) {
2687 NewC.Op0 = NewC.Op0.getOperand(0);
2688 MaskVal <<= ShiftVal;
2689 } else {
2690 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2691 NewC.ICmpType);
2692 if (!NewCCMask)
2693 return;
2694 }
2695
2696 // Go ahead and make the change.
2697 C.Opcode = SystemZISD::TM;
2698 C.Op0 = NewC.Op0;
2699 if (Mask && Mask->getZExtValue() == MaskVal)
2700 C.Op1 = SDValue(Mask, 0);
2701 else
2702 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2703 C.CCValid = SystemZ::CCMASK_TM;
2704 C.CCMask = NewCCMask;
2705}
2706
2707// See whether the comparison argument contains a redundant AND
2708// and remove it if so. This sometimes happens due to the generic
2709// BRCOND expansion.
2710static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
2711 Comparison &C) {
2712 if (C.Op0.getOpcode() != ISD::AND)
2713 return;
2714 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2715 if (!Mask)
2716 return;
2717 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
2718 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2719 return;
2720
2721 C.Op0 = C.Op0.getOperand(0);
2722}
2723
2724// Return a Comparison that tests the condition-code result of intrinsic
2725// node Call against constant integer CC using comparison code Cond.
2726// Opcode is the opcode of the SystemZISD operation for the intrinsic
2727// and CCValid is the set of possible condition-code results.
2728static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2729 SDValue Call, unsigned CCValid, uint64_t CC,
2730 ISD::CondCode Cond) {
2731 Comparison C(Call, SDValue(), SDValue());
2732 C.Opcode = Opcode;
2733 C.CCValid = CCValid;
2734 if (Cond == ISD::SETEQ)
2735 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2736 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2737 else if (Cond == ISD::SETNE)
2738 // ...and the inverse of that.
2739 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2740 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2741 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2742 // always true for CC>3.
2743 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
2744 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2745 // ...and the inverse of that.
2746 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2747 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2748 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2749 // always true for CC>3.
2750 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2751 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2752 // ...and the inverse of that.
2753 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
2754 else
2755 llvm_unreachable("Unexpected integer comparison type")::llvm::llvm_unreachable_internal("Unexpected integer comparison type"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2755)
;
2756 C.CCMask &= CCValid;
2757 return C;
2758}
2759
2760// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2761static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2762 ISD::CondCode Cond, const SDLoc &DL,
2763 SDValue Chain = SDValue(),
2764 bool IsSignaling = false) {
2765 if (CmpOp1.getOpcode() == ISD::Constant) {
2766 assert(!Chain)(static_cast <bool> (!Chain) ? void (0) : __assert_fail
("!Chain", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2766, __extension__ __PRETTY_FUNCTION__))
;
2767 uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2768 unsigned Opcode, CCValid;
2769 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2770 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2771 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2772 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2773 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2774 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2775 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2776 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2777 }
2778 Comparison C(CmpOp0, CmpOp1, Chain);
2779 C.CCMask = CCMaskForCondCode(Cond);
2780 if (C.Op0.getValueType().isFloatingPoint()) {
2781 C.CCValid = SystemZ::CCMASK_FCMP;
2782 if (!C.Chain)
2783 C.Opcode = SystemZISD::FCMP;
2784 else if (!IsSignaling)
2785 C.Opcode = SystemZISD::STRICT_FCMP;
2786 else
2787 C.Opcode = SystemZISD::STRICT_FCMPS;
2788 adjustForFNeg(C);
2789 } else {
2790 assert(!C.Chain)(static_cast <bool> (!C.Chain) ? void (0) : __assert_fail
("!C.Chain", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2790, __extension__ __PRETTY_FUNCTION__))
;
2791 C.CCValid = SystemZ::CCMASK_ICMP;
2792 C.Opcode = SystemZISD::ICMP;
2793 // Choose the type of comparison. Equality and inequality tests can
2794 // use either signed or unsigned comparisons. The choice also doesn't
2795 // matter if both sign bits are known to be clear. In those cases we
2796 // want to give the main isel code the freedom to choose whichever
2797 // form fits best.
2798 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2799 C.CCMask == SystemZ::CCMASK_CMP_NE ||
2800 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2801 C.ICmpType = SystemZICMP::Any;
2802 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2803 C.ICmpType = SystemZICMP::UnsignedOnly;
2804 else
2805 C.ICmpType = SystemZICMP::SignedOnly;
2806 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2807 adjustForRedundantAnd(DAG, DL, C);
2808 adjustZeroCmp(DAG, DL, C);
2809 adjustSubwordCmp(DAG, DL, C);
2810 adjustForSubtraction(DAG, DL, C);
2811 adjustForLTGFR(C);
2812 adjustICmpTruncate(DAG, DL, C);
2813 }
2814
2815 if (shouldSwapCmpOperands(C)) {
2816 std::swap(C.Op0, C.Op1);
2817 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2818 }
2819
2820 adjustForTestUnderMask(DAG, DL, C);
2821 return C;
2822}
2823
2824// Emit the comparison instruction described by C.
2825static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2826 if (!C.Op1.getNode()) {
2827 SDNode *Node;
2828 switch (C.Op0.getOpcode()) {
2829 case ISD::INTRINSIC_W_CHAIN:
2830 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
2831 return SDValue(Node, 0);
2832 case ISD::INTRINSIC_WO_CHAIN:
2833 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
2834 return SDValue(Node, Node->getNumValues() - 1);
2835 default:
2836 llvm_unreachable("Invalid comparison operands")::llvm::llvm_unreachable_internal("Invalid comparison operands"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2836)
;
2837 }
2838 }
2839 if (C.Opcode == SystemZISD::ICMP)
2840 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
2841 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
2842 if (C.Opcode == SystemZISD::TM) {
2843 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2844 bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2845 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
2846 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
2847 }
2848 if (C.Chain) {
2849 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
2850 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
2851 }
2852 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
2853}
2854
2855// Implement a 32-bit *MUL_LOHI operation by extending both operands to
2856// 64 bits. Extend is the extension type to use. Store the high part
2857// in Hi and the low part in Lo.
2858static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
2859 SDValue Op0, SDValue Op1, SDValue &Hi,
2860 SDValue &Lo) {
2861 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2862 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2863 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
2864 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2865 DAG.getConstant(32, DL, MVT::i64));
2866 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2867 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2868}
2869
2870// Lower a binary operation that produces two VT results, one in each
2871// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2872// and Opcode performs the GR128 operation. Store the even register result
2873// in Even and the odd register result in Odd.
2874static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2875 unsigned Opcode, SDValue Op0, SDValue Op1,
2876 SDValue &Even, SDValue &Odd) {
2877 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
2878 bool Is32Bit = is32Bit(VT);
2879 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2880 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
2881}
2882
2883// Return an i32 value that is 1 if the CC value produced by CCReg is
2884// in the mask CCMask and 0 otherwise. CC is known to have a value
2885// in CCValid, so other values can be ignored.
2886static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
2887 unsigned CCValid, unsigned CCMask) {
2888 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
2889 DAG.getConstant(0, DL, MVT::i32),
2890 DAG.getTargetConstant(CCValid, DL, MVT::i32),
2891 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
2892 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
2893}
2894
2895// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2896// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
2897// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
2898// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
2899// floating-point comparisons.
2900enum class CmpMode { Int, FP, StrictFP, SignalingFP };
2901static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) {
2902 switch (CC) {
2903 case ISD::SETOEQ:
2904 case ISD::SETEQ:
2905 switch (Mode) {
2906 case CmpMode::Int: return SystemZISD::VICMPE;
2907 case CmpMode::FP: return SystemZISD::VFCMPE;
2908 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
2909 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
2910 }
2911 llvm_unreachable("Bad mode")::llvm::llvm_unreachable_internal("Bad mode", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2911)
;
2912
2913 case ISD::SETOGE:
2914 case ISD::SETGE:
2915 switch (Mode) {
2916 case CmpMode::Int: return 0;
2917 case CmpMode::FP: return SystemZISD::VFCMPHE;
2918 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
2919 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
2920 }
2921 llvm_unreachable("Bad mode")::llvm::llvm_unreachable_internal("Bad mode", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2921)
;
2922
2923 case ISD::SETOGT:
2924 case ISD::SETGT:
2925 switch (Mode) {
2926 case CmpMode::Int: return SystemZISD::VICMPH;
2927 case CmpMode::FP: return SystemZISD::VFCMPH;
2928 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
2929 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
2930 }
2931 llvm_unreachable("Bad mode")::llvm::llvm_unreachable_internal("Bad mode", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2931)
;
2932
2933 case ISD::SETUGT:
2934 switch (Mode) {
2935 case CmpMode::Int: return SystemZISD::VICMPHL;
2936 case CmpMode::FP: return 0;
2937 case CmpMode::StrictFP: return 0;
2938 case CmpMode::SignalingFP: return 0;
2939 }
2940 llvm_unreachable("Bad mode")::llvm::llvm_unreachable_internal("Bad mode", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2940)
;
2941
2942 default:
2943 return 0;
2944 }
2945}
2946
2947// Return the SystemZISD vector comparison operation for CC or its inverse,
2948// or 0 if neither can be done directly. Indicate in Invert whether the
2949// result is for the inverse of CC. Mode is as above.
2950static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode,
2951 bool &Invert) {
2952 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2953 Invert = false;
2954 return Opcode;
2955 }
2956
2957 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
2958 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2959 Invert = true;
2960 return Opcode;
2961 }
2962
2963 return 0;
2964}
2965
2966// Return a v2f64 that contains the extended form of elements Start and Start+1
2967// of v4f32 value Op. If Chain is nonnull, return the strict form.
2968static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
2969 SDValue Op, SDValue Chain) {
2970 int Mask[] = { Start, -1, Start + 1, -1 };
2971 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
2972 if (Chain) {
2973 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
2974 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
2975 }
2976 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2977}
2978
2979// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2980// producing a result of type VT. If Chain is nonnull, return the strict form.
2981SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
2982 const SDLoc &DL, EVT VT,
2983 SDValue CmpOp0,
2984 SDValue CmpOp1,
2985 SDValue Chain) const {
2986 // There is no hardware support for v4f32 (unless we have the vector
2987 // enhancements facility 1), so extend the vector into two v2f64s
2988 // and compare those.
2989 if (CmpOp0.getValueType() == MVT::v4f32 &&
2990 !Subtarget.hasVectorEnhancements1()) {
2991 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
2992 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
2993 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
2994 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
2995 if (Chain) {
2996 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
2997 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
2998 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
2999 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3000 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3001 H1.getValue(1), L1.getValue(1),
3002 HRes.getValue(1), LRes.getValue(1) };
3003 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3004 SDValue Ops[2] = { Res, NewChain };
3005 return DAG.getMergeValues(Ops, DL);
3006 }
3007 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3008 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3009 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3010 }
3011 if (Chain) {
3012 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3013 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3014 }
3015 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3016}
3017
3018// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3019// an integer mask of type VT. If Chain is nonnull, we have a strict
3020// floating-point comparison. If in addition IsSignaling is true, we have
3021// a strict signaling floating-point comparison.
3022SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3023 const SDLoc &DL, EVT VT,
3024 ISD::CondCode CC,
3025 SDValue CmpOp0,
3026 SDValue CmpOp1,
3027 SDValue Chain,
3028 bool IsSignaling) const {
3029 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3030 assert (!Chain || IsFP)(static_cast <bool> (!Chain || IsFP) ? void (0) : __assert_fail
("!Chain || IsFP", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 3030, __extension__ __PRETTY_FUNCTION__))
;
3031 assert (!IsSignaling || Chain)(static_cast <bool> (!IsSignaling || Chain) ? void (0) :
__assert_fail ("!IsSignaling || Chain", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 3031, __extension__ __PRETTY_FUNCTION__))
;
3032 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3033 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3034 bool Invert = false;
3035 SDValue Cmp;
3036 switch (CC) {
3037 // Handle tests for order using (or (ogt y x) (oge x y)).
3038 case ISD::SETUO:
3039 Invert = true;
3040 [[fallthrough]];
3041 case ISD::SETO: {
3042 assert(IsFP && "Unexpected integer comparison")(static_cast <bool> (IsFP && "Unexpected integer comparison"
) ? void (0) : __assert_fail ("IsFP && \"Unexpected integer comparison\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 3042, __extension__
__PRETTY_FUNCTION__))
;
3043 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3044 DL, VT, CmpOp1, CmpOp0, Chain);
3045 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3046 DL, VT, CmpOp0, CmpOp1, Chain);
3047 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3048 if (Chain)
3049 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3050 LT.getValue(1), GE.getValue(1));
3051 break;
3052 }
3053
3054 // Handle <> tests using (or (ogt y x) (ogt x y)).
3055 case ISD::SETUEQ:
3056 Invert = true;
3057 [[fallthrough]];
3058 case ISD::SETONE: {
3059 assert(IsFP && "Unexpected integer comparison")(static_cast <bool> (IsFP && "Unexpected integer comparison"
) ? void (0) : __assert_fail ("IsFP && \"Unexpected integer comparison\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 3059, __extension__
__PRETTY_FUNCTION__))
;
3060 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3061 DL, VT, CmpOp1, CmpOp0, Chain);
3062 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3063 DL, VT, CmpOp0, CmpOp1, Chain);
3064 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3065 if (Chain)
3066 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3067 LT.getValue(1), GT.getValue(1));
3068 break;
3069 }
3070
3071 // Otherwise a single comparison is enough. It doesn't really
3072 // matter whether we try the inversion or the swap first, since
3073 // there are no cases where both work.
3074 default:
3075 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3076 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3077 else {
3078 CC = ISD::getSetCCSwappedOperands(CC);
3079 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3080 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3081 else
3082 llvm_unreachable("Unhandled comparison")::llvm::llvm_unreachable_internal("Unhandled comparison", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 3082)
;
3083 }
3084 if (Chain)
3085 Chain = Cmp.getValue(1);
3086 break;
3087 }
3088 if (Invert) {
3089 SDValue Mask =
3090 DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
3091 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3092 }
3093 if (Chain && Chain.getNode() != Cmp.getNode()) {
3094 SDValue Ops[2] = { Cmp, Chain };
3095 Cmp = DAG.getMergeValues(Ops, DL);
3096 }
3097 return Cmp;
3098}
3099
3100SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3101 SelectionDAG &DAG) const {
3102 SDValue CmpOp0 = Op.getOperand(0);
3103 SDValue CmpOp1 = Op.getOperand(1);
3104 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3105 SDLoc DL(Op);
3106 EVT VT = Op.getValueType();
3107 if (VT.isVector())
3108 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3109
3110 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3111 SDValue CCReg = emitCmp(DAG, DL, C);
3112 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3113}
3114
3115SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3116 SelectionDAG &DAG,
3117 bool IsSignaling) const {
3118 SDValue Chain = Op.getOperand(0);
3119 SDValue CmpOp0 = Op.getOperand(1);
3120 SDValue CmpOp1 = Op.getOperand(2);
3121 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3122 SDLoc DL(Op);
3123 EVT VT = Op.getNode()->getValueType(0);
3124 if (VT.isVector()) {
3125 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3126 Chain, IsSignaling);
3127 return Res.getValue(Op.getResNo());
3128 }
3129
3130 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3131 SDValue CCReg = emitCmp(DAG, DL, C);
3132 CCReg->setFlags(Op->getFlags());
3133 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3134 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3135 return DAG.getMergeValues(Ops, DL);
3136}
3137
3138SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3139 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3140 SDValue CmpOp0 = Op.getOperand(2);
3141 SDValue CmpOp1 = Op.getOperand(3);
3142 SDValue Dest = Op.getOperand(4);
3143 SDLoc DL(Op);
3144
3145 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3146 SDValue CCReg = emitCmp(DAG, DL, C);
3147 return DAG.getNode(
3148 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3149 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3150 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3151}
3152
3153// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3154// allowing Pos and Neg to be wider than CmpOp.
3155static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3156 return (Neg.getOpcode() == ISD::SUB &&
3157 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3158 cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
3159 Neg.getOperand(1) == Pos &&
3160 (Pos == CmpOp ||
3161 (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3162 Pos.getOperand(0) == CmpOp)));
3163}
3164
3165// Return the absolute or negative absolute of Op; IsNegative decides which.
3166static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op,
3167 bool IsNegative) {
3168 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3169 if (IsNegative)
3170 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3171 DAG.getConstant(0, DL, Op.getValueType()), Op);
3172 return Op;
3173}
3174
3175SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3176 SelectionDAG &DAG) const {
3177 SDValue CmpOp0 = Op.getOperand(0);
3178 SDValue CmpOp1 = Op.getOperand(1);
3179 SDValue TrueOp = Op.getOperand(2);
3180 SDValue FalseOp = Op.getOperand(3);
3181 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3182 SDLoc DL(Op);
3183
3184 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3185
3186 // Check for absolute and negative-absolute selections, including those
3187 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3188 // This check supplements the one in DAGCombiner.
3189 if (C.Opcode == SystemZISD::ICMP &&
3190 C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3191 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3192 C.Op1.getOpcode() == ISD::Constant &&
3193 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
3194 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3195 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3196 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3197 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3198 }
3199
3200 SDValue CCReg = emitCmp(DAG, DL, C);
3201 SDValue Ops[] = {TrueOp, FalseOp,
3202 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3203 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3204
3205 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3206}
3207
3208SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3209 SelectionDAG &DAG) const {
3210 SDLoc DL(Node);
3211 const GlobalValue *GV = Node->getGlobal();
3212 int64_t Offset = Node->getOffset();
3213 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3214 CodeModel::Model CM = DAG.getTarget().getCodeModel();
3215
3216 SDValue Result;
3217 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3218 if (isInt<32>(Offset)) {
3219 // Assign anchors at 1<<12 byte boundaries.
3220 uint64_t Anchor = Offset & ~uint64_t(0xfff);
3221 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3222 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3223
3224 // The offset can be folded into the address if it is aligned to a
3225 // halfword.
3226 Offset -= Anchor;
3227 if (Offset != 0 && (Offset & 1) == 0) {
3228 SDValue Full =
3229 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3230 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3231 Offset = 0;
3232 }
3233 } else {
3234 // Conservatively load a constant offset greater than 32 bits into a
3235 // register below.
3236 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3237 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3238 }
3239 } else {
3240 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3241 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3242 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3243 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3244 }
3245
3246 // If there was a non-zero offset that we didn't fold, create an explicit
3247 // addition for it.
3248 if (Offset != 0)
3249 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3250 DAG.getConstant(Offset, DL, PtrVT));
3251
3252 return Result;
3253}
3254
3255SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3256 SelectionDAG &DAG,
3257 unsigned Opcode,
3258 SDValue GOTOffset) const {
3259 SDLoc DL(Node);
3260 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3261 SDValue Chain = DAG.getEntryNode();
3262 SDValue Glue;
3263
3264 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3265 CallingConv::GHC)
3266 report_fatal_error("In GHC calling convention TLS is not supported");
3267
3268 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3269 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3270 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3271 Glue = Chain.getValue(1);
3272 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3273 Glue = Chain.getValue(1);
3274
3275 // The first call operand is the chain and the second is the TLS symbol.
3276 SmallVector<SDValue, 8> Ops;
3277 Ops.push_back(Chain);
3278 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3279 Node->getValueType(0),
3280 0, 0));
3281
3282 // Add argument registers to the end of the list so that they are
3283 // known live into the call.
3284 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3285 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3286
3287 // Add a register mask operand representing the call-preserved registers.
3288 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3289 const uint32_t *Mask =
3290 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3291 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 3291, __extension__
__PRETTY_FUNCTION__))
;
3292 Ops.push_back(DAG.getRegisterMask(Mask));
3293
3294 // Glue the call to the argument copies.
3295 Ops.push_back(Glue);
3296
3297 // Emit the call.
3298 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3299 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3300 Glue = Chain.getValue(1);
3301
3302 // Copy the return value from %r2.
3303 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3304}
3305
3306SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3307 SelectionDAG &DAG) const {
3308 SDValue Chain = DAG.getEntryNode();
3309 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3310
3311 // The high part of the thread pointer is in access register 0.
3312 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3313 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3314
3315 // The low part of the thread pointer is in access register 1.
3316 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3317 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3318
3319 // Merge them into a single 64-bit address.
3320 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3321 DAG.getConstant(32, DL, PtrVT));
3322 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3323}
3324
3325SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3326 SelectionDAG &DAG) const {
3327 if (DAG.getTarget().useEmulatedTLS())
3328 return LowerToTLSEmulatedModel(Node, DAG);
3329 SDLoc DL(Node);
3330 const GlobalValue *GV = Node->getGlobal();
3331 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3332 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
3333
3334 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3335 CallingConv::GHC)
3336 report_fatal_error("In GHC calling convention TLS is not supported");
3337
3338 SDValue TP = lowerThreadPointer(DL, DAG);
3339
3340 // Get the offset of GA from the thread pointer, based on the TLS model.
3341 SDValue Offset;
3342 switch (model) {
3343 case TLSModel::GeneralDynamic: {
3344 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3345 SystemZConstantPoolValue *CPV =
3346 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
3347
3348 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3349 Offset = DAG.getLoad(
3350 PtrVT, DL, DAG.getEntryNode(), Offset,
3351 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3352
3353 // Call __tls_get_offset to retrieve the offset.
3354 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3355 break;
3356 }
3357
3358 case TLSModel::LocalDynamic: {
3359 // Load the GOT offset of the module ID.
3360 SystemZConstantPoolValue *CPV =
3361 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
3362
3363 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3364 Offset = DAG.getLoad(
3365 PtrVT, DL, DAG.getEntryNode(), Offset,
3366 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3367
3368 // Call __tls_get_offset to retrieve the module base offset.
3369 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3370
3371 // Note: The SystemZLDCleanupPass will remove redundant computations
3372 // of the module base offset. Count total number of local-dynamic
3373 // accesses to trigger execution of that pass.
3374 SystemZMachineFunctionInfo* MFI =
3375 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
3376 MFI->incNumLocalDynamicTLSAccesses();
3377
3378 // Add the per-symbol offset.
3379 CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
3380
3381 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3382 DTPOffset = DAG.getLoad(
3383 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3384 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3385
3386 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3387 break;
3388 }
3389
3390 case TLSModel::InitialExec: {
3391 // Load the offset from the GOT.
3392 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3393 SystemZII::MO_INDNTPOFF);
3394 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
3395 Offset =
3396 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3397 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3398 break;
3399 }
3400
3401 case TLSModel::LocalExec: {
3402 // Force the offset into the constant pool and load it from there.
3403 SystemZConstantPoolValue *CPV =
3404 SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
3405
3406 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3407 Offset = DAG.getLoad(
3408 PtrVT, DL, DAG.getEntryNode(), Offset,
3409 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3410 break;
3411 }
3412 }
3413
3414 // Add the base and offset together.
3415 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3416}
3417
3418SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3419 SelectionDAG &DAG) const {
3420 SDLoc DL(Node);
3421 const BlockAddress *BA = Node->getBlockAddress();
3422 int64_t Offset = Node->getOffset();
3423 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3424
3425 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3426 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3427 return Result;
3428}
3429
3430SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3431 SelectionDAG &DAG) const {
3432 SDLoc DL(JT);
3433 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3434 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3435
3436 // Use LARL to load the address of the table.
3437 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3438}
3439
3440SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3441 SelectionDAG &DAG) const {
3442 SDLoc DL(CP);
3443 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3444
3445 SDValue Result;
3446 if (CP->isMachineConstantPoolEntry())
3447 Result =
3448 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3449 else
3450 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3451 CP->getOffset());
3452
3453 // Use LARL to load the address of the constant pool entry.
3454 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3455}
3456
3457SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3458 SelectionDAG &DAG) const {
3459 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
3460 MachineFunction &MF = DAG.getMachineFunction();
3461 MachineFrameInfo &MFI = MF.getFrameInfo();
3462 MFI.setFrameAddressIsTaken(true);
3463
3464 SDLoc DL(Op);
3465 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3466 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3467
3468 // By definition, the frame address is the address of the back chain. (In
3469 // the case of packed stack without backchain, return the address where the
3470 // backchain would have been stored. This will either be an unused space or
3471 // contain a saved register).
3472 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3473 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3474
3475 // FIXME The frontend should detect this case.
3476 if (Depth > 0) {
3477 report_fatal_error("Unsupported stack frame traversal count");
3478 }
3479
3480 return BackChain;
3481}
3482
3483SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3484 SelectionDAG &DAG) const {
3485 MachineFunction &MF = DAG.getMachineFunction();
3486 MachineFrameInfo &MFI = MF.getFrameInfo();
3487 MFI.setReturnAddressIsTaken(true);
3488
3489 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
3490 return SDValue();
3491
3492 SDLoc DL(Op);
3493 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3494 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3495
3496 // FIXME The frontend should detect this case.
3497 if (Depth > 0) {
3498 report_fatal_error("Unsupported stack frame traversal count");
3499 }
3500
3501 // Return R14D, which has the return address. Mark it an implicit live-in.
3502 Register LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
3503 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3504}
3505
3506SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3507 SelectionDAG &DAG) const {
3508 SDLoc DL(Op);
3509 SDValue In = Op.getOperand(0);
3510 EVT InVT = In.getValueType();
3511 EVT ResVT = Op.getValueType();
3512
3513 // Convert loads directly. This is normally done by DAGCombiner,
3514 // but we need this case for bitcasts that are created during lowering
3515 // and which are then lowered themselves.
3516 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3517 if (ISD::isNormalLoad(LoadN)) {
3518 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3519 LoadN->getBasePtr(), LoadN->getMemOperand());
3520 // Update the chain uses.
3521 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3522 return NewLoad;
3523 }
3524
3525 if (InVT == MVT::i32 && ResVT == MVT::f32) {
3526 SDValue In64;
3527 if (Subtarget.hasHighWord()) {
3528 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3529 MVT::i64);
3530 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3531 MVT::i64, SDValue(U64, 0), In);
3532 } else {
3533 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3534 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3535 DAG.getConstant(32, DL, MVT::i64));
3536 }
3537 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3538 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3539 DL, MVT::f32, Out64);
3540 }
3541 if (InVT == MVT::f32 && ResVT == MVT::i32) {
3542 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3543 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3544 MVT::f64, SDValue(U64, 0), In);
3545 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3546 if (Subtarget.hasHighWord())
3547 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3548 MVT::i32, Out64);
3549 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3550 DAG.getConstant(32, DL, MVT::i64));
3551 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3552 }
3553 llvm_unreachable("Unexpected bitcast combination")::llvm::llvm_unreachable_internal("Unexpected bitcast combination"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 3553)
;
3554}
3555
3556SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3557 SelectionDAG &DAG) const {
3558
3559 if (Subtarget.isTargetXPLINK64())
3560 return lowerVASTART_XPLINK(Op, DAG);
3561 else
3562 return lowerVASTART_ELF(Op, DAG);
3563}
3564
3565SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
3566 SelectionDAG &DAG) const {
3567 MachineFunction &MF = DAG.getMachineFunction();
3568 SystemZMachineFunctionInfo *FuncInfo =
3569 MF.getInfo<SystemZMachineFunctionInfo>();
3570
3571 SDLoc DL(Op);
3572
3573 // vastart just stores the address of the VarArgsFrameIndex slot into the
3574 // memory location argument.
3575 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3576 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3577 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3578 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
3579 MachinePointerInfo(SV));
3580}
3581
3582SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
3583 SelectionDAG &DAG) const {
3584 MachineFunction &MF = DAG.getMachineFunction();
3585 SystemZMachineFunctionInfo *FuncInfo =
3586 MF.getInfo<SystemZMachineFunctionInfo>();
3587 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3588
3589 SDValue Chain = Op.getOperand(0);
3590 SDValue Addr = Op.getOperand(1);
3591 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3592 SDLoc DL(Op);
3593
3594 // The initial values of each field.
3595 const unsigned NumFields = 4;
3596 SDValue Fields[NumFields] = {
3597 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3598 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3599 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3600 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3601 };
3602
3603 // Store each field into its respective slot.
3604 SDValue MemOps[NumFields];
3605 unsigned Offset = 0;
3606 for (unsigned I = 0; I < NumFields; ++I) {
3607 SDValue FieldAddr = Addr;
3608 if (Offset != 0)
3609 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3610 DAG.getIntPtrConstant(Offset, DL));
3611 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3612 MachinePointerInfo(SV, Offset));
3613 Offset += 8;
3614 }
3615 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3616}
3617
3618SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3619 SelectionDAG &DAG) const {
3620 SDValue Chain = Op.getOperand(0);
3621 SDValue DstPtr = Op.getOperand(1);
3622 SDValue SrcPtr = Op.getOperand(2);
3623 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3624 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3625 SDLoc DL(Op);
3626
3627 uint32_t Sz =
3628 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
3629 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
3630 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3631 /*isTailCall*/ false, MachinePointerInfo(DstSV),
3632 MachinePointerInfo(SrcSV));
3633}
3634
3635SDValue
3636SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
3637 SelectionDAG &DAG) const {
3638 if (Subtarget.isTargetXPLINK64())
3639 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
3640 else
3641 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
3642}
3643
3644SDValue
3645SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
3646 SelectionDAG &DAG) const {
3647 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3648 MachineFunction &MF = DAG.getMachineFunction();
3649 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3650 SDValue Chain = Op.getOperand(0);
3651 SDValue Size = Op.getOperand(1);
3652 SDValue Align = Op.getOperand(2);
3653 SDLoc DL(Op);
3654
3655 // If user has set the no alignment function attribute, ignore
3656 // alloca alignments.
3657 uint64_t AlignVal =
3658 (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3659
3660 uint64_t StackAlign = TFI->getStackAlignment();
3661 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3662 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3663
3664 SDValue NeededSpace = Size;
3665
3666 // Add extra space for alignment if needed.
3667 EVT PtrVT = getPointerTy(MF.getDataLayout());
3668 if (ExtraAlignSpace)
3669 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
3670 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3671
3672 bool IsSigned = false;
3673 bool DoesNotReturn = false;
3674 bool IsReturnValueUsed = false;
3675 EVT VT = Op.getValueType();
3676 SDValue AllocaCall =
3677 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
3678 CallingConv::C, IsSigned, DL, DoesNotReturn,
3679 IsReturnValueUsed)
3680 .first;
3681
3682 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
3683 // to end of call in order to ensure it isn't broken up from the call
3684 // sequence.
3685 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
3686 Register SPReg = Regs.getStackPointerRegister();
3687 Chain = AllocaCall.getValue(1);
3688 SDValue Glue = AllocaCall.getValue(2);
3689 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
3690 Chain = NewSPRegNode.getValue(1);
3691
3692 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
3693 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
3694 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
3695
3696 // Dynamically realign if needed.
3697 if (ExtraAlignSpace) {
3698 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3699 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3700 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
3701 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
3702 }
3703
3704 SDValue Ops[2] = {Result, Chain};
3705 return DAG.getMergeValues(Ops, DL);
3706}
3707
3708SDValue
3709SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
3710 SelectionDAG &DAG) const {
3711 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3712 MachineFunction &MF = DAG.getMachineFunction();
3713 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3714 bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3715
3716 SDValue Chain = Op.getOperand(0);
3717 SDValue Size = Op.getOperand(1);
3718 SDValue Align = Op.getOperand(2);
3719 SDLoc DL(Op);
3720
3721 // If user has set the no alignment function attribute, ignore
3722 // alloca alignments.
3723 uint64_t AlignVal =
3724 (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3725
3726 uint64_t StackAlign = TFI->getStackAlignment();
3727 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3728 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3729
3730 Register SPReg = getStackPointerRegisterToSaveRestore();
3731 SDValue NeededSpace = Size;
3732
3733 // Get a reference to the stack pointer.
3734 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
3735
3736 // If we need a backchain, save it now.
3737 SDValue Backchain;
3738 if (StoreBackchain)
3739 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
3740 MachinePointerInfo());
3741
3742 // Add extra space for alignment if needed.
3743 if (ExtraAlignSpace)
3744 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
3745 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3746
3747 // Get the new stack pointer value.
3748 SDValue NewSP;
3749 if (hasInlineStackProbe(MF)) {
3750 NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
3751 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
3752 Chain = NewSP.getValue(1);
3753 }
3754 else {
3755 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
3756 // Copy the new stack pointer back.
3757 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
3758 }
3759
3760 // The allocated data lives above the 160 bytes allocated for the standard
3761 // frame, plus any outgoing stack arguments. We don't know how much that
3762 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3763 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3764 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
3765
3766 // Dynamically realign if needed.
3767 if (RequiredAlign > StackAlign) {
3768 Result =
3769 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
3770 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3771 Result =
3772 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
3773 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
3774 }
3775
3776 if (StoreBackchain)
3777 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
3778 MachinePointerInfo());
3779
3780 SDValue Ops[2] = { Result, Chain };
3781 return DAG.getMergeValues(Ops, DL);
3782}
3783
3784SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3785 SDValue Op, SelectionDAG &DAG) const {
3786 SDLoc DL(Op);
3787
3788 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3789}
3790
3791SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
3792 SelectionDAG &DAG) const {
3793 EVT VT = Op.getValueType();
3794 SDLoc DL(Op);
3795 SDValue Ops[2];
3796 if (is32Bit(VT))
3797 // Just do a normal 64-bit multiplication and extract the results.
3798 // We define this so that it can be used for constant division.
3799 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
3800 Op.getOperand(1), Ops[1], Ops[0]);
3801 else if (Subtarget.hasMiscellaneousExtensions2())
3802 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3803 // the high result in the even register. ISD::SMUL_LOHI is defined to
3804 // return the low half first, so the results are in reverse order.
3805 lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI,
3806 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3807 else {
3808 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3809 //
3810 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3811 //
3812 // but using the fact that the upper halves are either all zeros
3813 // or all ones:
3814 //
3815 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3816 //
3817 // and grouping the right terms together since they are quicker than the
3818 // multiplication:
3819 //
3820 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3821 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
3822 SDValue LL = Op.getOperand(0);
3823 SDValue RL = Op.getOperand(1);
3824 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
3825 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
3826 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3827 // the high result in the even register. ISD::SMUL_LOHI is defined to
3828 // return the low half first, so the results are in reverse order.
3829 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
3830 LL, RL, Ops[1], Ops[0]);
3831 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
3832 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
3833 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
3834 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
3835 }
3836 return DAG.getMergeValues(Ops, DL);
3837}
3838
3839SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
3840 SelectionDAG &DAG) const {
3841 EVT VT = Op.getValueType();
3842 SDLoc DL(Op);
3843 SDValue Ops[2];
3844 if (is32Bit(VT))
3845 // Just do a normal 64-bit multiplication and extract the results.
3846 // We define this so that it can be used for constant division.
3847 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
3848 Op.getOperand(1), Ops[1], Ops[0]);
3849 else
3850 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3851 // the high result in the even register. ISD::UMUL_LOHI is defined to
3852 // return the low half first, so the results are in reverse order.
3853 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
3854 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3855 return DAG.getMergeValues(Ops, DL);
3856}
3857
3858SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
3859 SelectionDAG &DAG) const {
3860 SDValue Op0 = Op.getOperand(0);
3861 SDValue Op1 = Op.getOperand(1);
3862 EVT VT = Op.getValueType();
3863 SDLoc DL(Op);
3864
3865 // We use DSGF for 32-bit division. This means the first operand must
3866 // always be 64-bit, and the second operand should be 32-bit whenever
3867 // that is possible, to improve performance.
3868 if (is32Bit(VT))
3869 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
3870 else if (DAG.ComputeNumSignBits(Op1) > 32)
3871 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
3872
3873 // DSG(F) returns the remainder in the even register and the
3874 // quotient in the odd register.
3875 SDValue Ops[2];
3876 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
3877 return DAG.getMergeValues(Ops, DL);
3878}
3879
3880SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
3881 SelectionDAG &DAG) const {
3882 EVT VT = Op.getValueType();
3883 SDLoc DL(Op);
3884
3885 // DL(G) returns the remainder in the even register and the
3886 // quotient in the odd register.
3887 SDValue Ops[2];
3888 lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM,
3889 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3890 return DAG.getMergeValues(Ops, DL);
3891}
3892
3893SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
3894 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation")(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"Should be 64-bit operation") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Should be 64-bit operation\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 3894, __extension__
__PRETTY_FUNCTION__))
;
3895
3896 // Get the known-zero masks for each operand.
3897 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
3898 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
3899 DAG.computeKnownBits(Ops[1])};
3900
3901 // See if the upper 32 bits of one operand and the lower 32 bits of the
3902 // other are known zero. They are the low and high operands respectively.
3903 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
3904 Known[1].Zero.getZExtValue() };
3905 unsigned High, Low;
3906 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
3907 High = 1, Low = 0;
3908 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
3909 High = 0, Low = 1;
3910 else
3911 return Op;
3912
3913 SDValue LowOp = Ops[Low];
3914 SDValue HighOp = Ops[High];
3915
3916 // If the high part is a constant, we're better off using IILH.
3917 if (HighOp.getOpcode() == ISD::Constant)
3918 return Op;
3919
3920 // If the low part is a constant that is outside the range of LHI,
3921 // then we're better off using IILF.
3922 if (LowOp.getOpcode() == ISD::Constant) {
3923 int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
3924 if (!isInt<16>(Value))
3925 return Op;
3926 }
3927
3928 // Check whether the high part is an AND that doesn't change the
3929 // high 32 bits and just masks out low bits. We can skip it if so.
3930 if (HighOp.getOpcode() == ISD::AND &&
3931 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
3932 SDValue HighOp0 = HighOp.getOperand(0);
3933 uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
3934 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
3935 HighOp = HighOp0;
3936 }
3937
3938 // Take advantage of the fact that all GR32 operations only change the
3939 // low 32 bits by truncating Low to an i32 and inserting it directly
3940 // using a subreg. The interesting cases are those where the truncation
3941 // can be folded.
3942 SDLoc DL(Op);
3943 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
3944 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
3945 MVT::i64, HighOp, Low32);
3946}
3947
3948// Lower SADDO/SSUBO/UADDO/USUBO nodes.
3949SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
3950 SelectionDAG &DAG) const {
3951 SDNode *N = Op.getNode();
3952 SDValue LHS = N->getOperand(0);
3953 SDValue RHS = N->getOperand(1);
3954 SDLoc DL(N);
3955 unsigned BaseOp = 0;
3956 unsigned CCValid = 0;
3957 unsigned CCMask = 0;
3958
3959 switch (Op.getOpcode()) {
3960 default: llvm_unreachable("Unknown instruction!")::llvm::llvm_unreachable_internal("Unknown instruction!", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 3960)
;
3961 case ISD::SADDO:
3962 BaseOp = SystemZISD::SADDO;
3963 CCValid = SystemZ::CCMASK_ARITH;
3964 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
3965 break;
3966 case ISD::SSUBO:
3967 BaseOp = SystemZISD::SSUBO;
3968 CCValid = SystemZ::CCMASK_ARITH;
3969 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
3970 break;
3971 case ISD::UADDO:
3972 BaseOp = SystemZISD::UADDO;
3973 CCValid = SystemZ::CCMASK_LOGICAL;
3974 CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
3975 break;
3976 case ISD::USUBO:
3977 BaseOp = SystemZISD::USUBO;
3978 CCValid = SystemZ::CCMASK_LOGICAL;
3979 CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
3980 break;
3981 }
3982
3983 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
3984 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
3985
3986 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3987 if (N->getValueType(1) == MVT::i1)
3988 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3989
3990 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3991}
3992
3993static bool isAddCarryChain(SDValue Carry) {
3994 while (Carry.getOpcode() == ISD::ADDCARRY)
3995 Carry = Carry.getOperand(2);
3996 return Carry.getOpcode() == ISD::UADDO;
3997}
3998
3999static bool isSubBorrowChain(SDValue Carry) {
4000 while (Carry.getOpcode() == ISD::SUBCARRY)
4001 Carry = Carry.getOperand(2);
4002 return Carry.getOpcode() == ISD::USUBO;
4003}
4004
4005// Lower ADDCARRY/SUBCARRY nodes.
4006SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
4007 SelectionDAG &DAG) const {
4008
4009 SDNode *N = Op.getNode();
4010 MVT VT = N->getSimpleValueType(0);
4011
4012 // Let legalize expand this if it isn't a legal type yet.
4013 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4014 return SDValue();
4015
4016 SDValue LHS = N->getOperand(0);
4017 SDValue RHS = N->getOperand(1);
4018 SDValue Carry = Op.getOperand(2);
4019 SDLoc DL(N);
4020 unsigned BaseOp = 0;
4021 unsigned CCValid = 0;
4022 unsigned CCMask = 0;
4023
4024 switch (Op.getOpcode()) {
4025 default: llvm_unreachable("Unknown instruction!")::llvm::llvm_unreachable_internal("Unknown instruction!", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 4025)
;
4026 case ISD::ADDCARRY:
4027 if (!isAddCarryChain(Carry))
4028 return SDValue();
4029
4030 BaseOp = SystemZISD::ADDCARRY;
4031 CCValid = SystemZ::CCMASK_LOGICAL;
4032 CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
4033 break;
4034 case ISD::SUBCARRY:
4035 if (!isSubBorrowChain(Carry))
4036 return SDValue();
4037
4038 BaseOp = SystemZISD::SUBCARRY;
4039 CCValid = SystemZ::CCMASK_LOGICAL;
4040 CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
4041 break;
4042 }
4043
4044 // Set the condition code from the carry flag.
4045 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4046 DAG.getConstant(CCValid, DL, MVT::i32),
4047 DAG.getConstant(CCMask, DL, MVT::i32));
4048
4049 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4050 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4051
4052 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4053 if (N->getValueType(1) == MVT::i1)
4054 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4055
4056 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4057}
4058
4059SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4060 SelectionDAG &DAG) const {
4061 EVT VT = Op.getValueType();
4062 SDLoc DL(Op);
4063 Op = Op.getOperand(0);
4064
4065 // Handle vector types via VPOPCT.
4066 if (VT.isVector()) {
4067 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4068 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4069 switch (VT.getScalarSizeInBits()) {
4070 case 8:
4071 break;
4072 case 16: {
4073 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4074 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
4075 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
4076 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4077 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
4078 break;
4079 }
4080 case 32: {
4081 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4082 DAG.getConstant(0, DL, MVT::i32));
4083 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4084 break;
4085 }
4086 case 64: {
4087 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4088 DAG.getConstant(0, DL, MVT::i32));
4089 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
4090 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4091 break;
4092 }
4093 default:
4094 llvm_unreachable("Unexpected type")::llvm::llvm_unreachable_internal("Unexpected type", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 4094)
;
4095 }
4096 return Op;
4097 }
4098
4099 // Get the known-zero mask for the operand.
4100 KnownBits Known = DAG.computeKnownBits(Op);
4101 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
4102 if (NumSignificantBits == 0)
4103 return DAG.getConstant(0, DL, VT);
4104
4105 // Skip known-zero high parts of the operand.
4106 int64_t OrigBitSize = VT.getSizeInBits();
4107 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
4108 BitSize = std::min(BitSize, OrigBitSize);
4109
4110 // The POPCNT instruction counts the number of bits in each byte.
4111 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
4112 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
4113 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
4114
4115 // Add up per-byte counts in a binary tree. All bits of Op at
4116 // position larger than BitSize remain zero throughout.
4117 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
4118 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
4119 if (BitSize != OrigBitSize)
4120 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
4121 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
4122 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4123 }
4124
4125 // Extract overall result from high byte.
4126 if (BitSize > 8)
4127 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
4128 DAG.getConstant(BitSize - 8, DL, VT));
4129
4130 return Op;
4131}
4132
4133SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
4134 SelectionDAG &DAG) const {
4135 SDLoc DL(Op);
4136 AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
4137 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
4138 SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
4139 cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
4140
4141 // The only fence that needs an instruction is a sequentially-consistent
4142 // cross-thread fence.
4143 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
4144 FenceSSID == SyncScope::System) {
4145 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
4146 Op.getOperand(0)),
4147 0);
4148 }
4149
4150 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4151 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
4152}
4153
4154// Op is an atomic load. Lower it into a normal volatile load.
4155SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
4156 SelectionDAG &DAG) const {
4157 auto *Node = cast<AtomicSDNode>(Op.getNode());
4158 return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
4159 Node->getChain(), Node->getBasePtr(),
4160 Node->getMemoryVT(), Node->getMemOperand());
4161}
4162
4163// Op is an atomic store. Lower it into a normal volatile store.
4164SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
4165 SelectionDAG &DAG) const {
4166 auto *Node = cast<AtomicSDNode>(Op.getNode());
4167 SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
4168 Node->getBasePtr(), Node->getMemoryVT(),
4169 Node->getMemOperand());
4170 // We have to enforce sequential consistency by performing a
4171 // serialization operation after the store.
4172 if (Node->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent)
4173 Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
4174 MVT::Other, Chain), 0);
4175 return Chain;
4176}
4177
4178// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
4179// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
4180SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
4181 SelectionDAG &DAG,
4182 unsigned Opcode) const {
4183 auto *Node = cast<AtomicSDNode>(Op.getNode());
4184
4185 // 32-bit operations need no code outside the main loop.
4186 EVT NarrowVT = Node->getMemoryVT();
4187 EVT WideVT = MVT::i32;
4188 if (NarrowVT == WideVT)
4189 return Op;
4190
4191 int64_t BitSize = NarrowVT.getSizeInBits();
4192 SDValue ChainIn = Node->getChain();
4193 SDValue Addr = Node->getBasePtr();
4194 SDValue Src2 = Node->getVal();
4195 MachineMemOperand *MMO = Node->getMemOperand();
4196 SDLoc DL(Node);
4197 EVT PtrVT = Addr.getValueType();
4198
4199 // Convert atomic subtracts of constants into additions.
4200 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
4201 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
4202 Opcode = SystemZISD::ATOMIC_LOADW_ADD;
4203 Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
4204 }
4205
4206 // Get the address of the containing word.
4207 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4208 DAG.getConstant(-4, DL, PtrVT));
4209
4210 // Get the number of bits that the word must be rotated left in order
4211 // to bring the field to the top bits of a GR32.
4212 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4213 DAG.getConstant(3, DL, PtrVT));
4214 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4215
4216 // Get the complementing shift amount, for rotating a field in the top
4217 // bits back to its proper position.
4218 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4219 DAG.getConstant(0, DL, WideVT), BitShift);
4220
4221 // Extend the source operand to 32 bits and prepare it for the inner loop.
4222 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
4223 // operations require the source to be shifted in advance. (This shift
4224 // can be folded if the source is constant.) For AND and NAND, the lower
4225 // bits must be set, while for other opcodes they should be left clear.
4226 if (Opcode != SystemZISD::ATOMIC_SWAPW)
4227 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
4228 DAG.getConstant(32 - BitSize, DL, WideVT));
4229 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
4230 Opcode == SystemZISD::ATOMIC_LOADW_NAND)
4231 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
4232 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
4233
4234 // Construct the ATOMIC_LOADW_* node.
4235 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
4236 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
4237 DAG.getConstant(BitSize, DL, WideVT) };
4238 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
4239 NarrowVT, MMO);
4240
4241 // Rotate the result of the final CS so that the field is in the lower
4242 // bits of a GR32, then truncate it.
4243 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
4244 DAG.getConstant(BitSize, DL, WideVT));
4245 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
4246
4247 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
4248 return DAG.getMergeValues(RetOps, DL);
4249}
4250
4251// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
4252// into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
4253// operations into additions.
4254SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
4255 SelectionDAG &DAG) const {
4256 auto *Node = cast<AtomicSDNode>(Op.getNode());
4257 EVT MemVT = Node->getMemoryVT();
4258 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
4259 // A full-width operation.
4260 assert(Op.getValueType() == MemVT && "Mismatched VTs")(static_cast <bool> (Op.getValueType() == MemVT &&
"Mismatched VTs") ? void (0) : __assert_fail ("Op.getValueType() == MemVT && \"Mismatched VTs\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4260, __extension__
__PRETTY_FUNCTION__))
;
4261 SDValue Src2 = Node->getVal();
4262 SDValue NegSrc2;
4263 SDLoc DL(Src2);
4264
4265 if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
4266 // Use an addition if the operand is constant and either LAA(G) is
4267 // available or the negative value is in the range of A(G)FHI.
4268 int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
4269 if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
4270 NegSrc2 = DAG.getConstant(Value, DL, MemVT);
4271 } else if (Subtarget.hasInterlockedAccess1())
4272 // Use LAA(G) if available.
4273 NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
4274 Src2);
4275
4276 if (NegSrc2.getNode())
4277 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
4278 Node->getChain(), Node->getBasePtr(), NegSrc2,
4279 Node->getMemOperand());
4280
4281 // Use the node as-is.
4282 return Op;
4283 }
4284
4285 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
4286}
4287
4288// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
4289SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
4290 SelectionDAG &DAG) const {
4291 auto *Node = cast<AtomicSDNode>(Op.getNode());
4292 SDValue ChainIn = Node->getOperand(0);
4293 SDValue Addr = Node->getOperand(1);
4294 SDValue CmpVal = Node->getOperand(2);
4295 SDValue SwapVal = Node->getOperand(3);
4296 MachineMemOperand *MMO = Node->getMemOperand();
4297 SDLoc DL(Node);
4298
4299 // We have native support for 32-bit and 64-bit compare and swap, but we
4300 // still need to expand extracting the "success" result from the CC.
4301 EVT NarrowVT = Node->getMemoryVT();
4302 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
4303 if (NarrowVT == WideVT) {
4304 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4305 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
4306 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP,
4307 DL, Tys, Ops, NarrowVT, MMO);
4308 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4309 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);
4310
4311 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
4312 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4313 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4314 return SDValue();
4315 }
4316
4317 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
4318 // via a fullword ATOMIC_CMP_SWAPW operation.
4319 int64_t BitSize = NarrowVT.getSizeInBits();
4320 EVT PtrVT = Addr.getValueType();
4321
4322 // Get the address of the containing word.
4323 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4324 DAG.getConstant(-4, DL, PtrVT));
4325
4326 // Get the number of bits that the word must be rotated left in order
4327 // to bring the field to the top bits of a GR32.
4328 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4329 DAG.getConstant(3, DL, PtrVT));
4330 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4331
4332 // Get the complementing shift amount, for rotating a field in the top
4333 // bits back to its proper position.
4334 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4335 DAG.getConstant(0, DL, WideVT), BitShift);
4336
4337 // Construct the ATOMIC_CMP_SWAPW node.
4338 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4339 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
4340 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
4341 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
4342 VTList, Ops, NarrowVT, MMO);
4343 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4344 SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ);
4345
4346 // emitAtomicCmpSwapW() will zero extend the result (original value).
4347 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
4348 DAG.getValueType(NarrowVT));
4349 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
4350 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4351 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4352 return SDValue();
4353}
4354
4355MachineMemOperand::Flags
4356SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
4357 // Because of how we convert atomic_load and atomic_store to normal loads and
4358 // stores in the DAG, we need to ensure that the MMOs are marked volatile
4359 // since DAGCombine hasn't been updated to account for atomic, but non
4360 // volatile loads. (See D57601)
4361 if (auto *SI = dyn_cast<StoreInst>(&I))
4362 if (SI->isAtomic())
4363 return MachineMemOperand::MOVolatile;
4364 if (auto *LI = dyn_cast<LoadInst>(&I))
4365 if (LI->isAtomic())
4366 return MachineMemOperand::MOVolatile;
4367 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
4368 if (AI->isAtomic())
4369 return MachineMemOperand::MOVolatile;
4370 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
4371 if (AI->isAtomic())
4372 return MachineMemOperand::MOVolatile;
4373 return MachineMemOperand::MONone;
4374}
4375
4376SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
4377 SelectionDAG &DAG) const {
4378 MachineFunction &MF = DAG.getMachineFunction();
4379 const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
4380 auto *Regs = Subtarget->getSpecialRegisters();
4381 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
4382 report_fatal_error("Variable-sized stack allocations are not supported "
4383 "in GHC calling convention");
4384 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
4385 Regs->getStackPointerRegister(), Op.getValueType());
4386}
4387
4388SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
4389 SelectionDAG &DAG) const {
4390 MachineFunction &MF = DAG.getMachineFunction();
4391 const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
4392 auto *Regs = Subtarget->getSpecialRegisters();
4393 bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
4394
4395 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
4396 report_fatal_error("Variable-sized stack allocations are not supported "
4397 "in GHC calling convention");
4398
4399 SDValue Chain = Op.getOperand(0);
4400 SDValue NewSP = Op.getOperand(1);
4401 SDValue Backchain;
4402 SDLoc DL(Op);
4403
4404 if (StoreBackchain) {
4405 SDValue OldSP = DAG.getCopyFromReg(
4406 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
4407 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4408 MachinePointerInfo());
4409 }
4410
4411 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
4412
4413 if (StoreBackchain)
4414 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4415 MachinePointerInfo());
4416
4417 return Chain;
4418}
4419
4420SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
4421 SelectionDAG &DAG) const {
4422 bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
4423 if (!IsData)
4424 // Just preserve the chain.
4425 return Op.getOperand(0);
4426
4427 SDLoc DL(Op);
4428 bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
4429 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
4430 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
4431 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
4432 Op.getOperand(1)};
4433 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
4434 Node->getVTList(), Ops,
4435 Node->getMemoryVT(), Node->getMemOperand());
4436}
4437
4438// Convert condition code in CCReg to an i32 value.
4439static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) {
4440 SDLoc DL(CCReg);
4441 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
4442 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
4443 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
4444}
4445
4446SDValue
4447SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4448 SelectionDAG &DAG) const {
4449 unsigned Opcode, CCValid;
4450 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
4451 assert(Op->getNumValues() == 2 && "Expected only CC result and chain")(static_cast <bool> (Op->getNumValues() == 2 &&
"Expected only CC result and chain") ? void (0) : __assert_fail
("Op->getNumValues() == 2 && \"Expected only CC result and chain\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4451, __extension__
__PRETTY_FUNCTION__))
;
4452 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
4453 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
4454 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
4455 return SDValue();
4456 }
4457
4458 return SDValue();
4459}
4460
4461SDValue
4462SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4463 SelectionDAG &DAG) const {
4464 unsigned Opcode, CCValid;
4465 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
4466 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
4467 if (Op->getNumValues() == 1)
4468 return getCCResult(DAG, SDValue(Node, 0));
4469 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result")(static_cast <bool> (Op->getNumValues() == 2 &&
"Expected a CC and non-CC result") ? void (0) : __assert_fail
("Op->getNumValues() == 2 && \"Expected a CC and non-CC result\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4469, __extension__
__PRETTY_FUNCTION__))
;
4470 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
4471 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
4472 }
4473
4474 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4475 switch (Id) {
4476 case Intrinsic::thread_pointer:
4477 return lowerThreadPointer(SDLoc(Op), DAG);
4478
4479 case Intrinsic::s390_vpdi:
4480 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
4481 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4482
4483 case Intrinsic::s390_vperm:
4484 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
4485 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4486
4487 case Intrinsic::s390_vuphb:
4488 case Intrinsic::s390_vuphh:
4489 case Intrinsic::s390_vuphf:
4490 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
4491 Op.getOperand(1));
4492
4493 case Intrinsic::s390_vuplhb:
4494 case Intrinsic::s390_vuplhh:
4495 case Intrinsic::s390_vuplhf:
4496 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
4497 Op.getOperand(1));
4498
4499 case Intrinsic::s390_vuplb:
4500 case Intrinsic::s390_vuplhw:
4501 case Intrinsic::s390_vuplf:
4502 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
4503 Op.getOperand(1));
4504
4505 case Intrinsic::s390_vupllb:
4506 case Intrinsic::s390_vupllh:
4507 case Intrinsic::s390_vupllf:
4508 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
4509 Op.getOperand(1));
4510
4511 case Intrinsic::s390_vsumb:
4512 case Intrinsic::s390_vsumh:
4513 case Intrinsic::s390_vsumgh:
4514 case Intrinsic::s390_vsumgf:
4515 case Intrinsic::s390_vsumqf:
4516 case Intrinsic::s390_vsumqg:
4517 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
4518 Op.getOperand(1), Op.getOperand(2));
4519 }
4520
4521 return SDValue();
4522}
4523
4524namespace {
4525// Says that SystemZISD operation Opcode can be used to perform the equivalent
4526// of a VPERM with permute vector Bytes. If Opcode takes three operands,
4527// Operand is the constant third operand, otherwise it is the number of
4528// bytes in each element of the result.
4529struct Permute {
4530 unsigned Opcode;
4531 unsigned Operand;
4532 unsigned char Bytes[SystemZ::VectorBytes];
4533};
4534}
4535
4536static const Permute PermuteForms[] = {
4537 // VMRHG
4538 { SystemZISD::MERGE_HIGH, 8,
4539 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
4540 // VMRHF
4541 { SystemZISD::MERGE_HIGH, 4,
4542 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
4543 // VMRHH
4544 { SystemZISD::MERGE_HIGH, 2,
4545 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
4546 // VMRHB
4547 { SystemZISD::MERGE_HIGH, 1,
4548 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
4549 // VMRLG
4550 { SystemZISD::MERGE_LOW, 8,
4551 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
4552 // VMRLF
4553 { SystemZISD::MERGE_LOW, 4,
4554 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
4555 // VMRLH
4556 { SystemZISD::MERGE_LOW, 2,
4557 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
4558 // VMRLB
4559 { SystemZISD::MERGE_LOW, 1,
4560 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
4561 // VPKG
4562 { SystemZISD::PACK, 4,
4563 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
4564 // VPKF
4565 { SystemZISD::PACK, 2,
4566 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
4567 // VPKH
4568 { SystemZISD::PACK, 1,
4569 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
4570 // VPDI V1, V2, 4 (low half of V1, high half of V2)
4571 { SystemZISD::PERMUTE_DWORDS, 4,
4572 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
4573 // VPDI V1, V2, 1 (high half of V1, low half of V2)
4574 { SystemZISD::PERMUTE_DWORDS, 1,
4575 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
4576};
4577
4578// Called after matching a vector shuffle against a particular pattern.
4579// Both the original shuffle and the pattern have two vector operands.
4580// OpNos[0] is the operand of the original shuffle that should be used for
4581// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
4582// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
4583// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
4584// for operands 0 and 1 of the pattern.
4585static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
4586 if (OpNos[0] < 0) {
4587 if (OpNos[1] < 0)
4588 return false;
4589 OpNo0 = OpNo1 = OpNos[1];
4590 } else if (OpNos[1] < 0) {
4591 OpNo0 = OpNo1 = OpNos[0];
4592 } else {
4593 OpNo0 = OpNos[0];
4594 OpNo1 = OpNos[1];
4595 }
4596 return true;
4597}
4598
4599// Bytes is a VPERM-like permute vector, except that -1 is used for
4600// undefined bytes. Return true if the VPERM can be implemented using P.
4601// When returning true set OpNo0 to the VPERM operand that should be
4602// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
4603//
4604// For example, if swapping the VPERM operands allows P to match, OpNo0
4605// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
4606// operand, but rewriting it to use two duplicated operands allows it to
4607// match P, then OpNo0 and OpNo1 will be the same.
4608static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
4609 unsigned &OpNo0, unsigned &OpNo1) {
4610 int OpNos[] = { -1, -1 };
4611 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
4612 int Elt = Bytes[I];
4613 if (Elt >= 0) {
4614 // Make sure that the two permute vectors use the same suboperand
4615 // byte number. Only the operand numbers (the high bits) are
4616 // allowed to differ.
4617 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
4618 return false;
4619 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
4620 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
4621 // Make sure that the operand mappings are consistent with previous
4622 // elements.
4623 if (OpNos[ModelOpNo] == 1 - RealOpNo)
4624 return false;
4625 OpNos[ModelOpNo] = RealOpNo;
4626 }
4627 }
4628 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
4629}
4630
4631// As above, but search for a matching permute.
4632static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
4633 unsigned &OpNo0, unsigned &OpNo1) {
4634 for (auto &P : PermuteForms)
4635 if (matchPermute(Bytes, P, OpNo0, OpNo1))
4636 return &P;
4637 return nullptr;
4638}
4639
4640// Bytes is a VPERM-like permute vector, except that -1 is used for
4641// undefined bytes. This permute is an operand of an outer permute.
4642// See whether redistributing the -1 bytes gives a shuffle that can be
4643// implemented using P. If so, set Transform to a VPERM-like permute vector
4644// that, when applied to the result of P, gives the original permute in Bytes.
4645static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
4646 const Permute &P,
4647 SmallVectorImpl<int> &Transform) {
4648 unsigned To = 0;
4649 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
4650 int Elt = Bytes[From];
4651 if (Elt < 0)
4652 // Byte number From of the result is undefined.
4653 Transform[From] = -1;
4654 else {
4655 while (P.Bytes[To] != Elt) {
4656 To += 1;
4657 if (To == SystemZ::VectorBytes)
4658 return false;
4659 }
4660 Transform[From] = To;
4661 }
4662 }
4663 return true;
4664}
4665
4666// As above, but search for a matching permute.
4667static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
4668 SmallVectorImpl<int> &Transform) {
4669 for (auto &P : PermuteForms)
4670 if (matchDoublePermute(Bytes, P, Transform))
4671 return &P;
4672 return nullptr;
4673}
4674
4675// Convert the mask of the given shuffle op into a byte-level mask,
4676// as if it had type vNi8.
4677static bool getVPermMask(SDValue ShuffleOp,
4678 SmallVectorImpl<int> &Bytes) {
4679 EVT VT = ShuffleOp.getValueType();
4680 unsigned NumElements = VT.getVectorNumElements();
4681 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4682
4683 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
4684 Bytes.resize(NumElements * BytesPerElement, -1);
4685 for (unsigned I = 0; I < NumElements; ++I) {
4686 int Index = VSN->getMaskElt(I);
4687 if (Index >= 0)
4688 for (unsigned J = 0; J < BytesPerElement; ++J)
4689 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
4690 }
4691 return true;
4692 }
4693 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
4694 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
4695 unsigned Index = ShuffleOp.getConstantOperandVal(1);
4696 Bytes.resize(NumElements * BytesPerElement, -1);
4697 for (unsigned I = 0; I < NumElements; ++I)
4698 for (unsigned J = 0; J < BytesPerElement; ++J)
4699 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
4700 return true;
4701 }
4702 return false;
4703}
4704
4705// Bytes is a VPERM-like permute vector, except that -1 is used for
4706// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
4707// the result come from a contiguous sequence of bytes from one input.
4708// Set Base to the selector for the first byte if so.
4709static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
4710 unsigned BytesPerElement, int &Base) {
4711 Base = -1;
4712 for (unsigned I = 0; I < BytesPerElement; ++I) {
4713 if (Bytes[Start + I] >= 0) {
4714 unsigned Elem = Bytes[Start + I];
4715 if (Base < 0) {
4716 Base = Elem - I;
4717 // Make sure the bytes would come from one input operand.
4718 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
4719 return false;
4720 } else if (unsigned(Base) != Elem - I)
4721 return false;
4722 }
4723 }
4724 return true;
4725}
4726
4727// Bytes is a VPERM-like permute vector, except that -1 is used for
4728// undefined bytes. Return true if it can be performed using VSLDB.
4729// When returning true, set StartIndex to the shift amount and OpNo0
4730// and OpNo1 to the VPERM operands that should be used as the first
4731// and second shift operand respectively.
4732static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
4733 unsigned &StartIndex, unsigned &OpNo0,
4734 unsigned &OpNo1) {
4735 int OpNos[] = { -1, -1 };
4736 int Shift = -1;
4737 for (unsigned I = 0; I < 16; ++I) {
4738 int Index = Bytes[I];
4739 if (Index >= 0) {
4740 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
4741 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
4742 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
4743 if (Shift < 0)
4744 Shift = ExpectedShift;
4745 else if (Shift != ExpectedShift)
4746 return false;
4747 // Make sure that the operand mappings are consistent with previous
4748 // elements.
4749 if (OpNos[ModelOpNo] == 1 - RealOpNo)
4750 return false;
4751 OpNos[ModelOpNo] = RealOpNo;
4752 }
4753 }
4754 StartIndex = Shift;
4755 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
4756}
4757
4758// Create a node that performs P on operands Op0 and Op1, casting the
4759// operands to the appropriate type. The type of the result is determined by P.
4760static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
4761 const Permute &P, SDValue Op0, SDValue Op1) {
4762 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
4763 // elements of a PACK are twice as wide as the outputs.
4764 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
4765 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
4766 P.Operand);
4767 // Cast both operands to the appropriate type.
4768 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
4769 SystemZ::VectorBytes / InBytes);
4770 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
4771 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
4772 SDValue Op;
4773 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
4774 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
4775 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
4776 } else if (P.Opcode == SystemZISD::PACK) {
4777 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
4778 SystemZ::VectorBytes / P.Operand);
4779 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
4780 } else {
4781 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
4782 }
4783 return Op;
4784}
4785
4786static bool isZeroVector(SDValue N) {
4787 if (N->getOpcode() == ISD::BITCAST)
4788 N = N->getOperand(0);
4789 if (N->getOpcode() == ISD::SPLAT_VECTOR)
4790 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
4791 return Op->getZExtValue() == 0;
4792 return ISD::isBuildVectorAllZeros(N.getNode());
4793}
4794
4795// Return the index of the zero/undef vector, or UINT32_MAX if not found.
4796static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
4797 for (unsigned I = 0; I < Num ; I++)
4798 if (isZeroVector(Ops[I]))
4799 return I;
4800 return UINT32_MAX(4294967295U);
4801}
4802
4803// Bytes is a VPERM-like permute vector, except that -1 is used for
4804// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
4805// VSLDB or VPERM.
4806static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
4807 SDValue *Ops,
4808 const SmallVectorImpl<int> &Bytes) {
4809 for (unsigned I = 0; I < 2; ++I)
4810 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
4811
4812 // First see whether VSLDB can be used.
4813 unsigned StartIndex, OpNo0, OpNo1;
4814 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
4815 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
4816 Ops[OpNo1],
4817 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
4818
4819 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
4820 // eliminate a zero vector by reusing any zero index in the permute vector.
4821 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
4822 if (ZeroVecIdx != UINT32_MAX(4294967295U)) {
4823 bool MaskFirst = true;
4824 int ZeroIdx = -1;
4825 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
4826 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
4827 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
4828 if (OpNo == ZeroVecIdx && I == 0) {
4829 // If the first byte is zero, use mask as first operand.
4830 ZeroIdx = 0;
4831 break;
4832 }
4833 if (OpNo != ZeroVecIdx && Byte == 0) {
4834 // If mask contains a zero, use it by placing that vector first.
4835 ZeroIdx = I + SystemZ::VectorBytes;
4836 MaskFirst = false;
4837 break;
4838 }
4839 }
4840 if (ZeroIdx != -1) {
4841 SDValue IndexNodes[SystemZ::VectorBytes];
4842 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
4843 if (Bytes[I] >= 0) {
4844 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
4845 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
4846 if (OpNo == ZeroVecIdx)
4847 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
4848 else {
4849 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
4850 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
4851 }
4852 } else
4853 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
4854 }
4855 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
4856 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
4857 if (MaskFirst)
4858 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
4859 Mask);
4860 else
4861 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
4862 Mask);
4863 }
4864 }
4865
4866 SDValue IndexNodes[SystemZ::VectorBytes];
4867 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4868 if (Bytes[I] >= 0)
4869 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
4870 else
4871 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
4872 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
4873 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
4874 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
4875}
4876
4877namespace {
4878// Describes a general N-operand vector shuffle.
4879struct GeneralShuffle {
4880 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX(2147483647 *2U +1U)) {}
4881 void addUndef();
4882 bool add(SDValue, unsigned);
4883 SDValue getNode(SelectionDAG &, const SDLoc &);
4884 void tryPrepareForUnpack();
4885 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
4886 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
4887
4888 // The operands of the shuffle.
4889 SmallVector<SDValue, SystemZ::VectorBytes> Ops;
4890
4891 // Index I is -1 if byte I of the result is undefined. Otherwise the
4892 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
4893 // Bytes[I] / SystemZ::VectorBytes.
4894 SmallVector<int, SystemZ::VectorBytes> Bytes;
4895
4896 // The type of the shuffle result.
4897 EVT VT;
4898
4899 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
4900 unsigned UnpackFromEltSize;
4901};
4902}
4903
4904// Add an extra undefined element to the shuffle.
4905void GeneralShuffle::addUndef() {
4906 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4907 for (unsigned I = 0; I < BytesPerElement; ++I)
4908 Bytes.push_back(-1);
4909}
4910
4911// Add an extra element to the shuffle, taking it from element Elem of Op.
4912// A null Op indicates a vector input whose value will be calculated later;
4913// there is at most one such input per shuffle and it always has the same
4914// type as the result. Aborts and returns false if the source vector elements
4915// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
4916// LLVM they become implicitly extended, but this is rare and not optimized.
4917bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
4918 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4919
4920 // The source vector can have wider elements than the result,
4921 // either through an explicit TRUNCATE or because of type legalization.
4922 // We want the least significant part.
4923 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
4924 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
4925
4926 // Return false if the source elements are smaller than their destination
4927 // elements.
4928 if (FromBytesPerElement < BytesPerElement)
4929 return false;
4930
4931 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
4932 (FromBytesPerElement - BytesPerElement));
4933
4934 // Look through things like shuffles and bitcasts.
4935 while (Op.getNode()) {
4936 if (Op.getOpcode() == ISD::BITCAST)
4937 Op = Op.getOperand(0);
4938 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
4939 // See whether the bytes we need come from a contiguous part of one
4940 // operand.
4941 SmallVector<int, SystemZ::VectorBytes> OpBytes;
4942 if (!getVPermMask(Op, OpBytes))
4943 break;
4944 int NewByte;
4945 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
4946 break;
4947 if (NewByte < 0) {
4948 addUndef();
4949 return true;
4950 }
4951 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
4952 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
4953 } else if (Op.isUndef()) {
4954 addUndef();
4955 return true;
4956 } else
4957 break;
4958 }
4959
4960 // Make sure that the source of the extraction is in Ops.
4961 unsigned OpNo = 0;
4962 for (; OpNo < Ops.size(); ++OpNo)
4963 if (Ops[OpNo] == Op)
4964 break;
4965 if (OpNo == Ops.size())
4966 Ops.push_back(Op);
4967
4968 // Add the element to Bytes.
4969 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
4970 for (unsigned I = 0; I < BytesPerElement; ++I)
4971 Bytes.push_back(Base + I);
4972
4973 return true;
4974}
4975
4976// Return SDNodes for the completed shuffle.
4977SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
4978 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector")(static_cast <bool> (Bytes.size() == SystemZ::VectorBytes
&& "Incomplete vector") ? void (0) : __assert_fail (
"Bytes.size() == SystemZ::VectorBytes && \"Incomplete vector\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4978, __extension__
__PRETTY_FUNCTION__))
;
4979
4980 if (Ops.size() == 0)
4981 return DAG.getUNDEF(VT);
4982
4983 // Use a single unpack if possible as the last operation.
4984 tryPrepareForUnpack();
4985
4986 // Make sure that there are at least two shuffle operands.
4987 if (Ops.size() == 1)
4988 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
4989
4990 // Create a tree of shuffles, deferring root node until after the loop.
4991 // Try to redistribute the undefined elements of non-root nodes so that
4992 // the non-root shuffles match something like a pack or merge, then adjust
4993 // the parent node's permute vector to compensate for the new order.
4994 // Among other things, this copes with vectors like <2 x i16> that were
4995 // padded with undefined elements during type legalization.
4996 //
4997 // In the best case this redistribution will lead to the whole tree
4998 // using packs and merges. It should rarely be a loss in other cases.
4999 unsigned Stride = 1;
5000 for (; Stride * 2 < Ops.size(); Stride *= 2) {
5001 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
5002 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
5003
5004 // Create a mask for just these two operands.
5005 SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes);
5006 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5007 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
5008 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
5009 if (OpNo == I)
5010 NewBytes[J] = Byte;
5011 else if (OpNo == I + Stride)
5012 NewBytes[J] = SystemZ::VectorBytes + Byte;
5013 else
5014 NewBytes[J] = -1;
5015 }
5016 // See if it would be better to reorganize NewMask to avoid using VPERM.
5017 SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
5018 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
5019 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
5020 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
5021 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5022 if (NewBytes[J] >= 0) {
5023 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&(static_cast <bool> (unsigned(NewBytesMap[J]) < SystemZ
::VectorBytes && "Invalid double permute") ? void (0)
: __assert_fail ("unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && \"Invalid double permute\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5024, __extension__
__PRETTY_FUNCTION__))
5024 "Invalid double permute")(static_cast <bool> (unsigned(NewBytesMap[J]) < SystemZ
::VectorBytes && "Invalid double permute") ? void (0)
: __assert_fail ("unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && \"Invalid double permute\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5024, __extension__
__PRETTY_FUNCTION__))
;
5025 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
5026 } else
5027 assert(NewBytesMap[J] < 0 && "Invalid double permute")(static_cast <bool> (NewBytesMap[J] < 0 && "Invalid double permute"
) ? void (0) : __assert_fail ("NewBytesMap[J] < 0 && \"Invalid double permute\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5027, __extension__
__PRETTY_FUNCTION__))
;
5028 }
5029 } else {
5030 // Just use NewBytes on the operands.
5031 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
5032 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
5033 if (NewBytes[J] >= 0)
5034 Bytes[J] = I * SystemZ::VectorBytes + J;
5035 }
5036 }
5037 }
5038
5039 // Now we just have 2 inputs. Put the second operand in Ops[1].
5040 if (Stride > 1) {
5041 Ops[1] = Ops[Stride];
5042 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5043 if (Bytes[I] >= int(SystemZ::VectorBytes))
5044 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
5045 }
5046
5047 // Look for an instruction that can do the permute without resorting
5048 // to VPERM.
5049 unsigned OpNo0, OpNo1;
5050 SDValue Op;
5051 if (unpackWasPrepared() && Ops[1].isUndef())
5052 Op = Ops[0];
5053 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
5054 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
5055 else
5056 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
5057
5058 Op = insertUnpackIfPrepared(DAG, DL, Op);
5059
5060 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5061}
5062
5063#ifndef NDEBUG
5064static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
5065 dbgs() << Msg.c_str() << " { ";
5066 for (unsigned i = 0; i < Bytes.size(); i++)
5067 dbgs() << Bytes[i] << " ";
5068 dbgs() << "}\n";
5069}
5070#endif
5071
5072// If the Bytes vector matches an unpack operation, prepare to do the unpack
5073// after all else by removing the zero vector and the effect of the unpack on
5074// Bytes.
5075void GeneralShuffle::tryPrepareForUnpack() {
5076 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
5077 if (ZeroVecOpNo == UINT32_MAX(4294967295U) || Ops.size() == 1)
5078 return;
5079
5080 // Only do this if removing the zero vector reduces the depth, otherwise
5081 // the critical path will increase with the final unpack.
5082 if (Ops.size() > 2 &&
5083 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
5084 return;
5085
5086 // Find an unpack that would allow removing the zero vector from Ops.
5087 UnpackFromEltSize = 1;
5088 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
5089 bool MatchUnpack = true;
5090 SmallVector<int, SystemZ::VectorBytes> SrcBytes;
5091 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
5092 unsigned ToEltSize = UnpackFromEltSize * 2;
5093 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
5094 if (!IsZextByte)
5095 SrcBytes.push_back(Bytes[Elt]);
5096 if (Bytes[Elt] != -1) {
5097 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
5098 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
5099 MatchUnpack = false;
5100 break;
5101 }
5102 }
5103 }
5104 if (MatchUnpack) {
5105 if (Ops.size() == 2) {
5106 // Don't use unpack if a single source operand needs rearrangement.
5107 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
5108 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
5109 UnpackFromEltSize = UINT_MAX(2147483647 *2U +1U);
5110 return;
5111 }
5112 }
5113 break;
5114 }
5115 }
5116 if (UnpackFromEltSize > 4)
5117 return;
5118
5119 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dbgs() << "Preparing for final unpack of element size "
<< UnpackFromEltSize << ". Zero vector is Op#" <<
ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:"
);; } } while (false)
5120 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNodo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dbgs() << "Preparing for final unpack of element size "
<< UnpackFromEltSize << ". Zero vector is Op#" <<
ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:"
);; } } while (false)
5121 << ".\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dbgs() << "Preparing for final unpack of element size "
<< UnpackFromEltSize << ". Zero vector is Op#" <<
ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:"
);; } } while (false)
5122 dumpBytes(Bytes, "Original Bytes vector:");)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dbgs() << "Preparing for final unpack of element size "
<< UnpackFromEltSize << ". Zero vector is Op#" <<
ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:"
);; } } while (false)
;
5123
5124 // Apply the unpack in reverse to the Bytes array.
5125 unsigned B = 0;
5126 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
5127 Elt += UnpackFromEltSize;
5128 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
5129 Bytes[B] = Bytes[Elt];
5130 }
5131 while (B < SystemZ::VectorBytes)
5132 Bytes[B++] = -1;
5133
5134 // Remove the zero vector from Ops
5135 Ops.erase(&Ops[ZeroVecOpNo]);
5136 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5137 if (Bytes[I] >= 0) {
5138 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5139 if (OpNo > ZeroVecOpNo)
5140 Bytes[I] -= SystemZ::VectorBytes;
5141 }
5142
5143 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:"
); dbgs() << "\n";; } } while (false)
5144 dbgs() << "\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:"
); dbgs() << "\n";; } } while (false)
;
5145}
5146
5147SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
5148 const SDLoc &DL,
5149 SDValue Op) {
5150 if (!unpackWasPrepared())
5151 return Op;
5152 unsigned InBits = UnpackFromEltSize * 8;
5153 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
5154 SystemZ::VectorBits / InBits);
5155 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
5156 unsigned OutBits = InBits * 2;
5157 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
5158 SystemZ::VectorBits / OutBits);
5159 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
5160}
5161
5162// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
5163static bool isScalarToVector(SDValue Op) {
5164 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
5165 if (!Op.getOperand(I).isUndef())
5166 return false;
5167 return true;
5168}
5169
5170// Return a vector of type VT that contains Value in the first element.
5171// The other elements don't matter.
5172static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5173 SDValue Value) {
5174 // If we have a constant, replicate it to all elements and let the
5175 // BUILD_VECTOR lowering take care of it.
5176 if (Value.getOpcode() == ISD::Constant ||
5177 Value.getOpcode() == ISD::ConstantFP) {
5178 SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);
5179 return DAG.getBuildVector(VT, DL, Ops);
5180 }
5181 if (Value.isUndef())
5182 return DAG.getUNDEF(VT);
5183 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
5184}
5185
5186// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
5187// element 1. Used for cases in which replication is cheap.
5188static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5189 SDValue Op0, SDValue Op1) {
5190 if (Op0.isUndef()) {
5191 if (Op1.isUndef())
5192 return DAG.getUNDEF(VT);
5193 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
5194 }
5195 if (Op1.isUndef())
5196 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
5197 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
5198 buildScalarToVector(DAG, DL, VT, Op0),
5199 buildScalarToVector(DAG, DL, VT, Op1));
5200}
5201
5202// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
5203// vector for them.
5204static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
5205 SDValue Op1) {
5206 if (Op0.isUndef() && Op1.isUndef())
5207 return DAG.getUNDEF(MVT::v2i64);
5208 // If one of the two inputs is undefined then replicate the other one,
5209 // in order to avoid using another register unnecessarily.
5210 if (Op0.isUndef())
5211 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5212 else if (Op1.isUndef())
5213 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5214 else {
5215 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5216 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5217 }
5218 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
5219}
5220
5221// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
5222// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
5223// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
5224// would benefit from this representation and return it if so.
5225static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
5226 BuildVectorSDNode *BVN) {
5227 EVT VT = BVN->getValueType(0);
5228 unsigned NumElements = VT.getVectorNumElements();
5229
5230 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
5231 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
5232 // need a BUILD_VECTOR, add an additional placeholder operand for that
5233 // BUILD_VECTOR and store its operands in ResidueOps.
5234 GeneralShuffle GS(VT);
5235 SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps;
5236 bool FoundOne = false;
5237 for (unsigned I = 0; I < NumElements; ++I) {
5238 SDValue Op = BVN->getOperand(I);
5239 if (Op.getOpcode() == ISD::TRUNCATE)
5240 Op = Op.getOperand(0);
5241 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5242 Op.getOperand(1).getOpcode() == ISD::Constant) {
5243 unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
5244 if (!GS.add(Op.getOperand(0), Elem))
5245 return SDValue();
5246 FoundOne = true;
5247 } else if (Op.isUndef()) {
5248 GS.addUndef();
5249 } else {
5250 if (!GS.add(SDValue(), ResidueOps.size()))
5251 return SDValue();
5252 ResidueOps.push_back(BVN->getOperand(I));
5253 }
5254 }
5255
5256 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
5257 if (!FoundOne)
5258 return SDValue();
5259
5260 // Create the BUILD_VECTOR for the remaining elements, if any.
5261 if (!ResidueOps.empty()) {
5262 while (ResidueOps.size() < NumElements)
5263 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
5264 for (auto &Op : GS.Ops) {
5265 if (!Op.getNode()) {
5266 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
5267 break;
5268 }
5269 }
5270 }
5271 return GS.getNode(DAG, SDLoc(BVN));
5272}
5273
5274bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
5275 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
5276 return true;
5277 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
5278 return true;
5279 return false;
5280}
5281
5282// Combine GPR scalar values Elems into a vector of type VT.
5283SDValue
5284SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5285 SmallVectorImpl<SDValue> &Elems) const {
5286 // See whether there is a single replicated value.
5287 SDValue Single;
5288 unsigned int NumElements = Elems.size();
5289 unsigned int Count = 0;
5290 for (auto Elem : Elems) {
5291 if (!Elem.isUndef()) {
5292 if (!Single.getNode())
5293 Single = Elem;
5294 else if (Elem != Single) {
5295 Single = SDValue();
5296 break;
5297 }
5298 Count += 1;
5299 }
5300 }
5301 // There are three cases here:
5302 //
5303 // - if the only defined element is a loaded one, the best sequence
5304 // is a replicating load.
5305 //
5306 // - otherwise, if the only defined element is an i64 value, we will
5307 // end up with the same VLVGP sequence regardless of whether we short-cut
5308 // for replication or fall through to the later code.
5309 //
5310 // - otherwise, if the only defined element is an i32 or smaller value,
5311 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
5312 // This is only a win if the single defined element is used more than once.
5313 // In other cases we're better off using a single VLVGx.
5314 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
5315 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
5316
5317 // If all elements are loads, use VLREP/VLEs (below).
5318 bool AllLoads = true;
5319 for (auto Elem : Elems)
5320 if (!isVectorElementLoad(Elem)) {
5321 AllLoads = false;
5322 break;
5323 }
5324
5325 // The best way of building a v2i64 from two i64s is to use VLVGP.
5326 if (VT == MVT::v2i64 && !AllLoads)
5327 return joinDwords(DAG, DL, Elems[0], Elems[1]);
5328
5329 // Use a 64-bit merge high to combine two doubles.
5330 if (VT == MVT::v2f64 && !AllLoads)
5331 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5332
5333 // Build v4f32 values directly from the FPRs:
5334 //
5335 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
5336 // V V VMRHF
5337 // <ABxx> <CDxx>
5338 // V VMRHG
5339 // <ABCD>
5340 if (VT == MVT::v4f32 && !AllLoads) {
5341 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5342 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
5343 // Avoid unnecessary undefs by reusing the other operand.
5344 if (Op01.isUndef())
5345 Op01 = Op23;
5346 else if (Op23.isUndef())
5347 Op23 = Op01;
5348 // Merging identical replications is a no-op.
5349 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
5350 return Op01;
5351 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
5352 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
5353 SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
5354 DL, MVT::v2i64, Op01, Op23);
5355 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5356 }
5357
5358 // Collect the constant terms.
5359 SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue());
5360 SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
5361
5362 unsigned NumConstants = 0;
5363 for (unsigned I = 0; I < NumElements; ++I) {
5364 SDValue Elem = Elems[I];
5365 if (Elem.getOpcode() == ISD::Constant ||
5366 Elem.getOpcode() == ISD::ConstantFP) {
5367 NumConstants += 1;
5368 Constants[I] = Elem;
5369 Done[I] = true;
5370 }
5371 }
5372 // If there was at least one constant, fill in the other elements of
5373 // Constants with undefs to get a full vector constant and use that
5374 // as the starting point.
5375 SDValue Result;
5376 SDValue ReplicatedVal;
5377 if (NumConstants > 0) {
5378 for (unsigned I = 0; I < NumElements; ++I)
5379 if (!Constants[I].getNode())
5380 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
5381 Result = DAG.getBuildVector(VT, DL, Constants);
5382 } else {
5383 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
5384 // avoid a false dependency on any previous contents of the vector
5385 // register.
5386
5387 // Use a VLREP if at least one element is a load. Make sure to replicate
5388 // the load with the most elements having its value.
5389 std::map<const SDNode*, unsigned> UseCounts;
5390 SDNode *LoadMaxUses = nullptr;
5391 for (unsigned I = 0; I < NumElements; ++I)
5392 if (isVectorElementLoad(Elems[I])) {
5393 SDNode *Ld = Elems[I].getNode();
5394 UseCounts[Ld]++;
5395 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
5396 LoadMaxUses = Ld;
5397 }
5398 if (LoadMaxUses != nullptr) {
5399 ReplicatedVal = SDValue(LoadMaxUses, 0);
5400 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
5401 } else {
5402 // Try to use VLVGP.
5403 unsigned I1 = NumElements / 2 - 1;
5404 unsigned I2 = NumElements - 1;
5405 bool Def1 = !Elems[I1].isUndef();
5406 bool Def2 = !Elems[I2].isUndef();
5407 if (Def1 || Def2) {
5408 SDValue Elem1 = Elems[Def1 ? I1 : I2];
5409 SDValue Elem2 = Elems[Def2 ? I2 : I1];
5410 Result = DAG.getNode(ISD::BITCAST, DL, VT,
5411 joinDwords(DAG, DL, Elem1, Elem2));
5412 Done[I1] = true;
5413 Done[I2] = true;
5414 } else
5415 Result = DAG.getUNDEF(VT);
5416 }
5417 }
5418
5419 // Use VLVGx to insert the other elements.
5420 for (unsigned I = 0; I < NumElements; ++I)
5421 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
5422 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
5423 DAG.getConstant(I, DL, MVT::i32));
5424 return Result;
5425}
5426
5427SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
5428 SelectionDAG &DAG) const {
5429 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
5430 SDLoc DL(Op);
5431 EVT VT = Op.getValueType();
5432
5433 if (BVN->isConstant()) {
5434 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
5435 return Op;
5436
5437 // Fall back to loading it from memory.
5438 return SDValue();
5439 }
5440
5441 // See if we should use shuffles to construct the vector from other vectors.
5442 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
5443 return Res;
5444
5445 // Detect SCALAR_TO_VECTOR conversions.
5446 if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op))
5447 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
5448
5449 // Otherwise use buildVector to build the vector up from GPRs.
5450 unsigned NumElements = Op.getNumOperands();
5451 SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements);
5452 for (unsigned I = 0; I < NumElements; ++I)
5453 Ops[I] = Op.getOperand(I);
5454 return buildVector(DAG, DL, VT, Ops);
5455}
5456
5457SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
5458 SelectionDAG &DAG) const {
5459 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
5460 SDLoc DL(Op);
5461 EVT VT = Op.getValueType();
5462 unsigned NumElements = VT.getVectorNumElements();
5463
5464 if (VSN->isSplat()) {
5465 SDValue Op0 = Op.getOperand(0);
5466 unsigned Index = VSN->getSplatIndex();
5467 assert(Index < VT.getVectorNumElements() &&(static_cast <bool> (Index < VT.getVectorNumElements
() && "Splat index should be defined and in first operand"
) ? void (0) : __assert_fail ("Index < VT.getVectorNumElements() && \"Splat index should be defined and in first operand\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5468, __extension__
__PRETTY_FUNCTION__))
5468 "Splat index should be defined and in first operand")(static_cast <bool> (Index < VT.getVectorNumElements
() && "Splat index should be defined and in first operand"
) ? void (0) : __assert_fail ("Index < VT.getVectorNumElements() && \"Splat index should be defined and in first operand\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5468, __extension__
__PRETTY_FUNCTION__))
;
5469 // See whether the value we're splatting is directly available as a scalar.
5470 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5471 Op0.getOpcode() == ISD::BUILD_VECTOR)
5472 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
5473 // Otherwise keep it as a vector-to-vector operation.
5474 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
5475 DAG.getTargetConstant(Index, DL, MVT::i32));
5476 }
5477
5478 GeneralShuffle GS(VT);
5479 for (unsigned I = 0; I < NumElements; ++I) {
5480 int Elt = VSN->getMaskElt(I);
5481 if (Elt < 0)
5482 GS.addUndef();
5483 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
5484 unsigned(Elt) % NumElements))
5485 return SDValue();
5486 }
5487 return GS.getNode(DAG, SDLoc(VSN));
5488}
5489
5490SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
5491 SelectionDAG &DAG) const {
5492 SDLoc DL(Op);
5493 // Just insert the scalar into element 0 of an undefined vector.
5494 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
5495 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
5496 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
5497}
5498
5499SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
5500 SelectionDAG &DAG) const {
5501 // Handle insertions of floating-point values.
5502 SDLoc DL(Op);
5503 SDValue Op0 = Op.getOperand(0);
5504 SDValue Op1 = Op.getOperand(1);
5505 SDValue Op2 = Op.getOperand(2);
5506 EVT VT = Op.getValueType();
5507
5508 // Insertions into constant indices of a v2f64 can be done using VPDI.
5509 // However, if the inserted value is a bitcast or a constant then it's
5510 // better to use GPRs, as below.
5511 if (VT == MVT::v2f64 &&
5512 Op1.getOpcode() != ISD::BITCAST &&
5513 Op1.getOpcode() != ISD::ConstantFP &&
5514 Op2.getOpcode() == ISD::Constant) {
5515 uint64_t Index = cast<ConstantSDNode>(Op2)->getZExtValue();
5516 unsigned Mask = VT.getVectorNumElements() - 1;
5517 if (Index <= Mask)
5518 return Op;
5519 }
5520
5521 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
5522 MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
5523 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
5524 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
5525 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
5526 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
5527 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5528}
5529
5530SDValue
5531SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
5532 SelectionDAG &DAG) const {
5533 // Handle extractions of floating-point values.
5534 SDLoc DL(Op);
5535 SDValue Op0 = Op.getOperand(0);
5536 SDValue Op1 = Op.getOperand(1);
5537 EVT VT = Op.getValueType();
5538 EVT VecVT = Op0.getValueType();
5539
5540 // Extractions of constant indices can be done directly.
5541 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
5542 uint64_t Index = CIndexN->getZExtValue();
5543 unsigned Mask = VecVT.getVectorNumElements() - 1;
5544 if (Index <= Mask)
5545 return Op;
5546 }
5547
5548 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
5549 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
5550 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
5551 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
5552 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
5553 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5554}
5555
5556SDValue SystemZTargetLowering::
5557lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5558 SDValue PackedOp = Op.getOperand(0);
5559 EVT OutVT = Op.getValueType();
5560 EVT InVT = PackedOp.getValueType();
5561 unsigned ToBits = OutVT.getScalarSizeInBits();
5562 unsigned FromBits = InVT.getScalarSizeInBits();
5563 do {
5564 FromBits *= 2;
5565 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
5566 SystemZ::VectorBits / FromBits);
5567 PackedOp =
5568 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
5569 } while (FromBits != ToBits);
5570 return PackedOp;
5571}
5572
5573// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
5574SDValue SystemZTargetLowering::
5575lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5576 SDValue PackedOp = Op.getOperand(0);
5577 SDLoc DL(Op);
5578 EVT OutVT = Op.getValueType();
5579 EVT InVT = PackedOp.getValueType();
5580 unsigned InNumElts = InVT.getVectorNumElements();
5581 unsigned OutNumElts = OutVT.getVectorNumElements();
5582 unsigned NumInPerOut = InNumElts / OutNumElts;
5583
5584 SDValue ZeroVec =
5585 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
5586
5587 SmallVector<int, 16> Mask(InNumElts);
5588 unsigned ZeroVecElt = InNumElts;
5589 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
5590 unsigned MaskElt = PackedElt * NumInPerOut;
5591 unsigned End = MaskElt + NumInPerOut - 1;
5592 for (; MaskElt < End; MaskElt++)
5593 Mask[MaskElt] = ZeroVecElt++;
5594 Mask[MaskElt] = PackedElt;
5595 }
5596 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
5597 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
5598}
5599
5600SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
5601 unsigned ByScalar) const {
5602 // Look for cases where a vector shift can use the *_BY_SCALAR form.
5603 SDValue Op0 = Op.getOperand(0);
5604 SDValue Op1 = Op.getOperand(1);
5605 SDLoc DL(Op);
5606 EVT VT = Op.getValueType();
5607 unsigned ElemBitSize = VT.getScalarSizeInBits();
5608
5609 // See whether the shift vector is a splat represented as BUILD_VECTOR.
5610 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
5611 APInt SplatBits, SplatUndef;
5612 unsigned SplatBitSize;
5613 bool HasAnyUndefs;
5614 // Check for constant splats. Use ElemBitSize as the minimum element
5615 // width and reject splats that need wider elements.
5616 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
5617 ElemBitSize, true) &&
5618 SplatBitSize == ElemBitSize) {
5619 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
5620 DL, MVT::i32);
5621 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
5622 }
5623 // Check for variable splats.
5624 BitVector UndefElements;
5625 SDValue Splat = BVN->getSplatValue(&UndefElements);
5626 if (Splat) {
5627 // Since i32 is the smallest legal type, we either need a no-op
5628 // or a truncation.
5629 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
5630 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
5631 }
5632 }
5633
5634 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
5635 // and the shift amount is directly available in a GPR.
5636 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
5637 if (VSN->isSplat()) {
5638 SDValue VSNOp0 = VSN->getOperand(0);
5639 unsigned Index = VSN->getSplatIndex();
5640 assert(Index < VT.getVectorNumElements() &&(static_cast <bool> (Index < VT.getVectorNumElements
() && "Splat index should be defined and in first operand"
) ? void (0) : __assert_fail ("Index < VT.getVectorNumElements() && \"Splat index should be defined and in first operand\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5641, __extension__
__PRETTY_FUNCTION__))
5641 "Splat index should be defined and in first operand")(static_cast <bool> (Index < VT.getVectorNumElements
() && "Splat index should be defined and in first operand"
) ? void (0) : __assert_fail ("Index < VT.getVectorNumElements() && \"Splat index should be defined and in first operand\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5641, __extension__
__PRETTY_FUNCTION__))
;
5642 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5643 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
5644 // Since i32 is the smallest legal type, we either need a no-op
5645 // or a truncation.
5646 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
5647 VSNOp0.getOperand(Index));
5648 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
5649 }
5650 }
5651 }
5652
5653 // Otherwise just treat the current form as legal.
5654 return Op;
5655}
5656
5657SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
5658 SelectionDAG &DAG) const {
5659 SDLoc DL(Op);
5660 MVT ResultVT = Op.getSimpleValueType();
5661 SDValue Arg = Op.getOperand(0);
5662 auto CNode = cast<ConstantSDNode>(Op.getOperand(1));
5663 unsigned Check = CNode->getZExtValue();
5664
5665 unsigned TDCMask = 0;
5666 if (Check & fcSNan)
5667 TDCMask |= SystemZ::TDCMASK_SNAN_PLUS | SystemZ::TDCMASK_SNAN_MINUS;
5668 if (Check & fcQNan)
5669 TDCMask |= SystemZ::TDCMASK_QNAN_PLUS | SystemZ::TDCMASK_QNAN_MINUS;
5670 if (Check & fcPosInf)
5671 TDCMask |= SystemZ::TDCMASK_INFINITY_PLUS;
5672 if (Check & fcNegInf)
5673 TDCMask |= SystemZ::TDCMASK_INFINITY_MINUS;
5674 if (Check & fcPosNormal)
5675 TDCMask |= SystemZ::TDCMASK_NORMAL_PLUS;
5676 if (Check & fcNegNormal)
5677 TDCMask |= SystemZ::TDCMASK_NORMAL_MINUS;
5678 if (Check & fcPosSubnormal)
5679 TDCMask |= SystemZ::TDCMASK_SUBNORMAL_PLUS;
5680 if (Check & fcNegSubnormal)
5681 TDCMask |= SystemZ::TDCMASK_SUBNORMAL_MINUS;
5682 if (Check & fcPosZero)
5683 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
5684 if (Check & fcNegZero)
5685 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
5686 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
5687
5688 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
5689 return getCCResult(DAG, Intr);
5690}
5691
5692SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
5693 SelectionDAG &DAG) const {
5694 switch (Op.getOpcode()) {
5695 case ISD::FRAMEADDR:
5696 return lowerFRAMEADDR(Op, DAG);
5697 case ISD::RETURNADDR:
5698 return lowerRETURNADDR(Op, DAG);
5699 case ISD::BR_CC:
5700 return lowerBR_CC(Op, DAG);
5701 case ISD::SELECT_CC:
5702 return lowerSELECT_CC(Op, DAG);
5703 case ISD::SETCC:
5704 return lowerSETCC(Op, DAG);
5705 case ISD::STRICT_FSETCC:
5706 return lowerSTRICT_FSETCC(Op, DAG, false);
5707 case ISD::STRICT_FSETCCS:
5708 return lowerSTRICT_FSETCC(Op, DAG, true);
5709 case ISD::GlobalAddress:
5710 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
5711 case ISD::GlobalTLSAddress:
5712 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
5713 case ISD::BlockAddress:
5714 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
5715 case ISD::JumpTable:
5716 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
5717 case ISD::ConstantPool:
5718 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
5719 case ISD::BITCAST:
5720 return lowerBITCAST(Op, DAG);
5721 case ISD::VASTART:
5722 return lowerVASTART(Op, DAG);
5723 case ISD::VACOPY:
5724 return lowerVACOPY(Op, DAG);
5725 case ISD::DYNAMIC_STACKALLOC:
5726 return lowerDYNAMIC_STACKALLOC(Op, DAG);
5727 case ISD::GET_DYNAMIC_AREA_OFFSET:
5728 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
5729 case ISD::SMUL_LOHI:
5730 return lowerSMUL_LOHI(Op, DAG);
5731 case ISD::UMUL_LOHI:
5732 return lowerUMUL_LOHI(Op, DAG);
5733 case ISD::SDIVREM:
5734 return lowerSDIVREM(Op, DAG);
5735 case ISD::UDIVREM:
5736 return lowerUDIVREM(Op, DAG);
5737 case ISD::SADDO:
5738 case ISD::SSUBO:
5739 case ISD::UADDO:
5740 case ISD::USUBO:
5741 return lowerXALUO(Op, DAG);
5742 case ISD::ADDCARRY:
5743 case ISD::SUBCARRY:
5744 return lowerADDSUBCARRY(Op, DAG);
5745 case ISD::OR:
5746 return lowerOR(Op, DAG);
5747 case ISD::CTPOP:
5748 return lowerCTPOP(Op, DAG);
5749 case ISD::ATOMIC_FENCE:
5750 return lowerATOMIC_FENCE(Op, DAG);
5751 case ISD::ATOMIC_SWAP:
5752 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
5753 case ISD::ATOMIC_STORE:
5754 return lowerATOMIC_STORE(Op, DAG);
5755 case ISD::ATOMIC_LOAD:
5756 return lowerATOMIC_LOAD(Op, DAG);
5757 case ISD::ATOMIC_LOAD_ADD:
5758 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
5759 case ISD::ATOMIC_LOAD_SUB:
5760 return lowerATOMIC_LOAD_SUB(Op, DAG);
5761 case ISD::ATOMIC_LOAD_AND:
5762 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
5763 case ISD::ATOMIC_LOAD_OR:
5764 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
5765 case ISD::ATOMIC_LOAD_XOR:
5766 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
5767 case ISD::ATOMIC_LOAD_NAND:
5768 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
5769 case ISD::ATOMIC_LOAD_MIN:
5770 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
5771 case ISD::ATOMIC_LOAD_MAX:
5772 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
5773 case ISD::ATOMIC_LOAD_UMIN:
5774 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
5775 case ISD::ATOMIC_LOAD_UMAX:
5776 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
5777 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
5778 return lowerATOMIC_CMP_SWAP(Op, DAG);
5779 case ISD::STACKSAVE:
5780 return lowerSTACKSAVE(Op, DAG);
5781 case ISD::STACKRESTORE:
5782 return lowerSTACKRESTORE(Op, DAG);
5783 case ISD::PREFETCH:
5784 return lowerPREFETCH(Op, DAG);
5785 case ISD::INTRINSIC_W_CHAIN:
5786 return lowerINTRINSIC_W_CHAIN(Op, DAG);
5787 case ISD::INTRINSIC_WO_CHAIN:
5788 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
5789 case ISD::BUILD_VECTOR:
5790 return lowerBUILD_VECTOR(Op, DAG);
5791 case ISD::VECTOR_SHUFFLE:
5792 return lowerVECTOR_SHUFFLE(Op, DAG);
5793 case ISD::SCALAR_TO_VECTOR:
5794 return lowerSCALAR_TO_VECTOR(Op, DAG);
5795 case ISD::INSERT_VECTOR_ELT:
5796 return lowerINSERT_VECTOR_ELT(Op, DAG);
5797 case ISD::EXTRACT_VECTOR_ELT:
5798 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
5799 case ISD::SIGN_EXTEND_VECTOR_INREG:
5800 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
5801 case ISD::ZERO_EXTEND_VECTOR_INREG:
5802 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
5803 case ISD::SHL:
5804 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
5805 case ISD::SRL:
5806 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
5807 case ISD::SRA:
5808 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
5809 case ISD::IS_FPCLASS:
5810 return lowerIS_FPCLASS(Op, DAG);
5811 case ISD::GET_ROUNDING:
5812 return lowerGET_ROUNDING(Op, DAG);
5813 default:
5814 llvm_unreachable("Unexpected node to lower")::llvm::llvm_unreachable_internal("Unexpected node to lower",
"llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5814)
;
5815 }
5816}
5817
5818// Lower operations with invalid operand or result types (currently used
5819// only for 128-bit integer types).
5820void
5821SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
5822 SmallVectorImpl<SDValue> &Results,
5823 SelectionDAG &DAG) const {
5824 switch (N->getOpcode()) {
5825 case ISD::ATOMIC_LOAD: {
5826 SDLoc DL(N);
5827 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
5828 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
5829 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5830 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
5831 DL, Tys, Ops, MVT::i128, MMO);
5832 Results.push_back(lowerGR128ToI128(DAG, Res));
5833 Results.push_back(Res.getValue(1));
5834 break;
5835 }
5836 case ISD::ATOMIC_STORE: {
5837 SDLoc DL(N);
5838 SDVTList Tys = DAG.getVTList(MVT::Other);
5839 SDValue Ops[] = { N->getOperand(0),
5840 lowerI128ToGR128(DAG, N->getOperand(2)),
5841 N->getOperand(1) };
5842 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5843 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128,
5844 DL, Tys, Ops, MVT::i128, MMO);
5845 // We have to enforce sequential consistency by performing a
5846 // serialization operation after the store.
5847 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
5848 AtomicOrdering::SequentiallyConsistent)
5849 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
5850 MVT::Other, Res), 0);
5851 Results.push_back(Res);
5852 break;
5853 }
5854 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
5855 SDLoc DL(N);
5856 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
5857 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
5858 lowerI128ToGR128(DAG, N->getOperand(2)),
5859 lowerI128ToGR128(DAG, N->getOperand(3)) };
5860 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5861 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128,
5862 DL, Tys, Ops, MVT::i128, MMO);
5863 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
5864 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);
5865 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
5866 Results.push_back(lowerGR128ToI128(DAG, Res));
5867 Results.push_back(Success);
5868 Results.push_back(Res.getValue(2));
5869 break;
5870 }
5871 case ISD::BITCAST: {
5872 SDValue Src = N->getOperand(0);
5873 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
5874 !useSoftFloat()) {
5875 SDLoc DL(N);
5876 SDValue Lo, Hi;
5877 if (getRepRegClassFor(MVT::f128) == &SystemZ::VR128BitRegClass) {
5878 SDValue VecBC = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Src);
5879 Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
5880 DAG.getConstant(1, DL, MVT::i32));
5881 Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
5882 DAG.getConstant(0, DL, MVT::i32));
5883 } else {
5884 assert(getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass &&(static_cast <bool> (getRepRegClassFor(MVT::f128) == &
SystemZ::FP128BitRegClass && "Unrecognized register class for f128."
) ? void (0) : __assert_fail ("getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass && \"Unrecognized register class for f128.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5885, __extension__
__PRETTY_FUNCTION__))
5885 "Unrecognized register class for f128.")(static_cast <bool> (getRepRegClassFor(MVT::f128) == &
SystemZ::FP128BitRegClass && "Unrecognized register class for f128."
) ? void (0) : __assert_fail ("getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass && \"Unrecognized register class for f128.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5885, __extension__
__PRETTY_FUNCTION__))
;
5886 SDValue LoFP = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
5887 DL, MVT::f64, Src);
5888 SDValue HiFP = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
5889 DL, MVT::f64, Src);
5890 Lo = DAG.getNode(ISD::BITCAST, DL, MVT::i64, LoFP);
5891 Hi = DAG.getNode(ISD::BITCAST, DL, MVT::i64, HiFP);
5892 }
5893 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi));
5894 }
5895 break;
5896 }
5897 default:
5898 llvm_unreachable("Unexpected node to lower")::llvm::llvm_unreachable_internal("Unexpected node to lower",
"llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5898)
;
5899 }
5900}
5901
5902void
5903SystemZTargetLowering::ReplaceNodeResults(SDNode *N,
5904 SmallVectorImpl<SDValue> &Results,
5905 SelectionDAG &DAG) const {
5906 return LowerOperationWrapper(N, Results, DAG);
5907}
5908
5909const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
5910#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
5911 switch ((SystemZISD::NodeType)Opcode) {
5912 case SystemZISD::FIRST_NUMBER: break;
5913 OPCODE(RET_FLAG);
5914 OPCODE(CALL);
5915 OPCODE(SIBCALL);
5916 OPCODE(TLS_GDCALL);
5917 OPCODE(TLS_LDCALL);
5918 OPCODE(PCREL_WRAPPER);
5919 OPCODE(PCREL_OFFSET);
5920 OPCODE(ICMP);
5921 OPCODE(FCMP);
5922 OPCODE(STRICT_FCMP);
5923 OPCODE(STRICT_FCMPS);
5924 OPCODE(TM);
5925 OPCODE(BR_CCMASK);
5926 OPCODE(SELECT_CCMASK);
5927 OPCODE(ADJDYNALLOC);
5928 OPCODE(PROBED_ALLOCA);
5929 OPCODE(POPCNT);
5930 OPCODE(SMUL_LOHI);
5931 OPCODE(UMUL_LOHI);
5932 OPCODE(SDIVREM);
5933 OPCODE(UDIVREM);
5934 OPCODE(SADDO);
5935 OPCODE(SSUBO);
5936 OPCODE(UADDO);
5937 OPCODE(USUBO);
5938 OPCODE(ADDCARRY);
5939 OPCODE(SUBCARRY);
5940 OPCODE(GET_CCMASK);
5941 OPCODE(MVC);
5942 OPCODE(NC);
5943 OPCODE(OC);
5944 OPCODE(XC);
5945 OPCODE(CLC);
5946 OPCODE(MEMSET_MVC);
5947 OPCODE(STPCPY);
5948 OPCODE(STRCMP);
5949 OPCODE(SEARCH_STRING);
5950 OPCODE(IPM);
5951 OPCODE(TBEGIN);
5952 OPCODE(TBEGIN_NOFLOAT);
5953 OPCODE(TEND);
5954 OPCODE(BYTE_MASK);
5955 OPCODE(ROTATE_MASK);
5956 OPCODE(REPLICATE);
5957 OPCODE(JOIN_DWORDS);
5958 OPCODE(SPLAT);
5959 OPCODE(MERGE_HIGH);
5960 OPCODE(MERGE_LOW);
5961 OPCODE(SHL_DOUBLE);
5962 OPCODE(PERMUTE_DWORDS);
5963 OPCODE(PERMUTE);
5964 OPCODE(PACK);
5965 OPCODE(PACKS_CC);
5966 OPCODE(PACKLS_CC);
5967 OPCODE(UNPACK_HIGH);
5968 OPCODE(UNPACKL_HIGH);
5969 OPCODE(UNPACK_LOW);
5970 OPCODE(UNPACKL_LOW);
5971 OPCODE(VSHL_BY_SCALAR);
5972 OPCODE(VSRL_BY_SCALAR);
5973 OPCODE(VSRA_BY_SCALAR);
5974 OPCODE(VSUM);
5975 OPCODE(VICMPE);
5976 OPCODE(VICMPH);
5977 OPCODE(VICMPHL);
5978 OPCODE(VICMPES);
5979 OPCODE(VICMPHS);
5980 OPCODE(VICMPHLS);
5981 OPCODE(VFCMPE);
5982 OPCODE(STRICT_VFCMPE);
5983 OPCODE(STRICT_VFCMPES);
5984 OPCODE(VFCMPH);
5985 OPCODE(STRICT_VFCMPH);
5986 OPCODE(STRICT_VFCMPHS);
5987 OPCODE(VFCMPHE);
5988 OPCODE(STRICT_VFCMPHE);
5989 OPCODE(STRICT_VFCMPHES);
5990 OPCODE(VFCMPES);
5991 OPCODE(VFCMPHS);
5992 OPCODE(VFCMPHES);
5993 OPCODE(VFTCI);
5994 OPCODE(VEXTEND);
5995 OPCODE(STRICT_VEXTEND);
5996 OPCODE(VROUND);
5997 OPCODE(STRICT_VROUND);
5998 OPCODE(VTM);
5999 OPCODE(VFAE_CC);
6000 OPCODE(VFAEZ_CC);
6001 OPCODE(VFEE_CC);
6002 OPCODE(VFEEZ_CC);
6003 OPCODE(VFENE_CC);
6004 OPCODE(VFENEZ_CC);
6005 OPCODE(VISTR_CC);
6006 OPCODE(VSTRC_CC);
6007 OPCODE(VSTRCZ_CC);
6008 OPCODE(VSTRS_CC);
6009 OPCODE(VSTRSZ_CC);
6010 OPCODE(TDC);
6011 OPCODE(ATOMIC_SWAPW);
6012 OPCODE(ATOMIC_LOADW_ADD);
6013 OPCODE(ATOMIC_LOADW_SUB);
6014 OPCODE(ATOMIC_LOADW_AND);
6015 OPCODE(ATOMIC_LOADW_OR);
6016 OPCODE(ATOMIC_LOADW_XOR);
6017 OPCODE(ATOMIC_LOADW_NAND);
6018 OPCODE(ATOMIC_LOADW_MIN);
6019 OPCODE(ATOMIC_LOADW_MAX);
6020 OPCODE(ATOMIC_LOADW_UMIN);
6021 OPCODE(ATOMIC_LOADW_UMAX);
6022 OPCODE(ATOMIC_CMP_SWAPW);
6023 OPCODE(ATOMIC_CMP_SWAP);
6024 OPCODE(ATOMIC_LOAD_128);
6025 OPCODE(ATOMIC_STORE_128);
6026 OPCODE(ATOMIC_CMP_SWAP_128);
6027 OPCODE(LRV);
6028 OPCODE(STRV);
6029 OPCODE(VLER);
6030 OPCODE(VSTER);
6031 OPCODE(PREFETCH);
6032 }
6033 return nullptr;
6034#undef OPCODE
6035}
6036
6037// Return true if VT is a vector whose elements are a whole number of bytes
6038// in width. Also check for presence of vector support.
6039bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
6040 if (!Subtarget.hasVector())
6041 return false;
6042
6043 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
6044}
6045
6046// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
6047// producing a result of type ResVT. Op is a possibly bitcast version
6048// of the input vector and Index is the index (based on type VecVT) that
6049// should be extracted. Return the new extraction if a simplification
6050// was possible or if Force is true.
6051SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
6052 EVT VecVT, SDValue Op,
6053 unsigned Index,
6054 DAGCombinerInfo &DCI,
6055 bool Force) const {
6056 SelectionDAG &DAG = DCI.DAG;
6057
6058 // The number of bytes being extracted.
6059 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6060
6061 for (;;) {
6062 unsigned Opcode = Op.getOpcode();
6063 if (Opcode == ISD::BITCAST)
6064 // Look through bitcasts.
6065 Op = Op.getOperand(0);
6066 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
6067 canTreatAsByteVector(Op.getValueType())) {
6068 // Get a VPERM-like permute mask and see whether the bytes covered
6069 // by the extracted element are a contiguous sequence from one
6070 // source operand.
6071 SmallVector<int, SystemZ::VectorBytes> Bytes;
6072 if (!getVPermMask(Op, Bytes))
6073 break;
6074 int First;
6075 if (!getShuffleInput(Bytes, Index * BytesPerElement,
6076 BytesPerElement, First))
6077 break;
6078 if (First < 0)
6079 return DAG.getUNDEF(ResVT);
6080 // Make sure the contiguous sequence starts at a multiple of the
6081 // original element size.
6082 unsigned Byte = unsigned(First) % Bytes.size();
6083 if (Byte % BytesPerElement != 0)
6084 break;
6085 // We can get the extracted value directly from an input.
6086 Index = Byte / BytesPerElement;
6087 Op = Op.getOperand(unsigned(First) / Bytes.size());
6088 Force = true;
6089 } else if (Opcode == ISD::BUILD_VECTOR &&
6090 canTreatAsByteVector(Op.getValueType())) {
6091 // We can only optimize this case if the BUILD_VECTOR elements are
6092 // at least as wide as the extracted value.
6093 EVT OpVT = Op.getValueType();
6094 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6095 if (OpBytesPerElement < BytesPerElement)
6096 break;
6097 // Make sure that the least-significant bit of the extracted value
6098 // is the least significant bit of an input.
6099 unsigned End = (Index + 1) * BytesPerElement;
6100 if (End % OpBytesPerElement != 0)
6101 break;
6102 // We're extracting the low part of one operand of the BUILD_VECTOR.
6103 Op = Op.getOperand(End / OpBytesPerElement - 1);
6104 if (!Op.getValueType().isInteger()) {
6105 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
6106 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
6107 DCI.AddToWorklist(Op.getNode());
6108 }
6109 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
6110 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
6111 if (VT != ResVT) {
6112 DCI.AddToWorklist(Op.getNode());
6113 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
6114 }
6115 return Op;
6116 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6117 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
6118 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6119 canTreatAsByteVector(Op.getValueType()) &&
6120 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
6121 // Make sure that only the unextended bits are significant.
6122 EVT ExtVT = Op.getValueType();
6123 EVT OpVT = Op.getOperand(0).getValueType();
6124 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
6125 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6126 unsigned Byte = Index * BytesPerElement;
6127 unsigned SubByte = Byte % ExtBytesPerElement;
6128 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
6129 if (SubByte < MinSubByte ||
6130 SubByte + BytesPerElement > ExtBytesPerElement)
6131 break;
6132 // Get the byte offset of the unextended element
6133 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
6134 // ...then add the byte offset relative to that element.
6135 Byte += SubByte - MinSubByte;
6136 if (Byte % BytesPerElement != 0)
6137 break;
6138 Op = Op.getOperand(0);
6139 Index = Byte / BytesPerElement;
6140 Force = true;
6141 } else
6142 break;
6143 }
6144 if (Force) {
6145 if (Op.getValueType() != VecVT) {
6146 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
6147 DCI.AddToWorklist(Op.getNode());
6148 }
6149 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
6150 DAG.getConstant(Index, DL, MVT::i32));
6151 }
6152 return SDValue();
6153}
6154
6155// Optimize vector operations in scalar value Op on the basis that Op
6156// is truncated to TruncVT.
6157SDValue SystemZTargetLowering::combineTruncateExtract(
6158 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
6159 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
6160 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
6161 // of type TruncVT.
6162 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6163 TruncVT.getSizeInBits() % 8 == 0) {
6164 SDValue Vec = Op.getOperand(0);
6165 EVT VecVT = Vec.getValueType();
6166 if (canTreatAsByteVector(VecVT)) {
6167 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
6168 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6169 unsigned TruncBytes = TruncVT.getStoreSize();
6170 if (BytesPerElement % TruncBytes == 0) {
6171 // Calculate the value of Y' in the above description. We are
6172 // splitting the original elements into Scale equal-sized pieces
6173 // and for truncation purposes want the last (least-significant)
6174 // of these pieces for IndexN. This is easiest to do by calculating
6175 // the start index of the following element and then subtracting 1.
6176 unsigned Scale = BytesPerElement / TruncBytes;
6177 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
6178
6179 // Defer the creation of the bitcast from X to combineExtract,
6180 // which might be able to optimize the extraction.
6181 VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
6182 VecVT.getStoreSize() / TruncBytes);
6183 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
6184 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
6185 }
6186 }
6187 }
6188 }
6189 return SDValue();
6190}
6191
6192SDValue SystemZTargetLowering::combineZERO_EXTEND(
6193 SDNode *N, DAGCombinerInfo &DCI) const {
6194 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
6195 SelectionDAG &DAG = DCI.DAG;
6196 SDValue N0 = N->getOperand(0);
6197 EVT VT = N->getValueType(0);
6198 if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) {
6199 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
6200 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6201 if (TrueOp && FalseOp) {
6202 SDLoc DL(N0);
6203 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
6204 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
6205 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
6206 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
6207 // If N0 has multiple uses, change other uses as well.
6208 if (!N0.hasOneUse()) {
6209 SDValue TruncSelect =
6210 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
6211 DCI.CombineTo(N0.getNode(), TruncSelect);
6212 }
6213 return NewSelect;
6214 }
6215 }
6216 return SDValue();
6217}
6218
6219SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
6220 SDNode *N, DAGCombinerInfo &DCI) const {
6221 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
6222 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
6223 // into (select_cc LHS, RHS, -1, 0, COND)
6224 SelectionDAG &DAG = DCI.DAG;
6225 SDValue N0 = N->getOperand(0);
6226 EVT VT = N->getValueType(0);
6227 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
6228 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
6229 N0 = N0.getOperand(0);
6230 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
6231 SDLoc DL(N0);
6232 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
6233 DAG.getConstant(-1, DL, VT), DAG.getConstant(0, DL, VT),
6234 N0.getOperand(2) };
6235 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
6236 }
6237 return SDValue();
6238}
6239
6240SDValue SystemZTargetLowering::combineSIGN_EXTEND(
6241 SDNode *N, DAGCombinerInfo &DCI) const {
6242 // Convert (sext (ashr (shl X, C1), C2)) to
6243 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
6244 // cheap as narrower ones.
6245 SelectionDAG &DAG = DCI.DAG;
6246 SDValue N0 = N->getOperand(0);
6247 EVT VT = N->getValueType(0);
6248 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
6249 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6250 SDValue Inner = N0.getOperand(0);
6251 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
6252 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
6253 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
6254 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
6255 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
6256 EVT ShiftVT = N0.getOperand(1).getValueType();
6257 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
6258 Inner.getOperand(0));
6259 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
6260 DAG.getConstant(NewShlAmt, SDLoc(Inner),
6261 ShiftVT));
6262 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
6263 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
6264 }
6265 }
6266 }
6267 return SDValue();
6268}
6269
6270SDValue SystemZTargetLowering::combineMERGE(
6271 SDNode *N, DAGCombinerInfo &DCI) const {
6272 SelectionDAG &DAG = DCI.DAG;
6273 unsigned Opcode = N->getOpcode();
6274 SDValue Op0 = N->getOperand(0);
6275 SDValue Op1 = N->getOperand(1);
6276 if (Op0.getOpcode() == ISD::BITCAST)
6277 Op0 = Op0.getOperand(0);
6278 if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
6279 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
6280 // for v4f32.
6281 if (Op1 == N->getOperand(0))
6282 return Op1;
6283 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
6284 EVT VT = Op1.getValueType();
6285 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
6286 if (ElemBytes <= 4) {
6287 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
6288 SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
6289 EVT InVT = VT.changeVectorElementTypeToInteger();
6290 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
6291 SystemZ::VectorBytes / ElemBytes / 2);
6292 if (VT != InVT) {
6293 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
6294 DCI.AddToWorklist(Op1.getNode());
6295 }
6296 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
6297 DCI.AddToWorklist(Op.getNode());
6298 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
6299 }
6300 }
6301 return SDValue();
6302}
6303
6304SDValue SystemZTargetLowering::combineLOAD(
6305 SDNode *N, DAGCombinerInfo &DCI) const {
6306 SelectionDAG &DAG = DCI.DAG;
6307 EVT LdVT = N->getValueType(0);
6308 if (LdVT.isVector() || LdVT.isInteger())
6309 return SDValue();
6310 // Transform a scalar load that is REPLICATEd as well as having other
6311 // use(s) to the form where the other use(s) use the first element of the
6312 // REPLICATE instead of the load. Otherwise instruction selection will not
6313 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
6314 // point loads.
6315
6316 SDValue Replicate;
6317 SmallVector<SDNode*, 8> OtherUses;
6318 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
6319 UI != UE; ++UI) {
6320 if (UI->getOpcode() == SystemZISD::REPLICATE) {
6321 if (Replicate)
6322 return SDValue(); // Should never happen
6323 Replicate = SDValue(*UI, 0);
6324 }
6325 else if (UI.getUse().getResNo() == 0)
6326 OtherUses.push_back(*UI);
6327 }
6328 if (!Replicate || OtherUses.empty())
6329 return SDValue();
6330
6331 SDLoc DL(N);
6332 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
6333 Replicate, DAG.getConstant(0, DL, MVT::i32));
6334 // Update uses of the loaded Value while preserving old chains.
6335 for (SDNode *U : OtherUses) {
6336 SmallVector<SDValue, 8> Ops;
6337 for (SDValue Op : U->ops())
6338 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
6339 DAG.UpdateNodeOperands(U, Ops);
6340 }
6341 return SDValue(N, 0);
6342}
6343
6344bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
6345 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
6346 return true;
6347 if (Subtarget.hasVectorEnhancements2())
6348 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64)
6349 return true;
6350 return false;
6351}
6352
6353static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) {
6354 if (!VT.isVector() || !VT.isSimple() ||
6355 VT.getSizeInBits() != 128 ||
6356 VT.getScalarSizeInBits() % 8 != 0)
6357 return false;
6358
6359 unsigned NumElts = VT.getVectorNumElements();
6360 for (unsigned i = 0; i < NumElts; ++i) {
6361 if (M[i] < 0) continue; // ignore UNDEF indices
6362 if ((unsigned) M[i] != NumElts - 1 - i)
6363 return false;
6364 }
6365
6366 return true;
6367}
6368
6369static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
6370 for (auto *U : StoredVal->uses()) {
6371 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
6372 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
6373 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
6374 continue;
6375 } else if (isa<BuildVectorSDNode>(U)) {
6376 SDValue BuildVector = SDValue(U, 0);
6377 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
6378 isOnlyUsedByStores(BuildVector, DAG))
6379 continue;
6380 }
6381 return false;
6382 }
6383 return true;
6384}
6385
6386SDValue SystemZTargetLowering::combineSTORE(
6387 SDNode *N, DAGCombinerInfo &DCI) const {
6388 SelectionDAG &DAG = DCI.DAG;
6389 auto *SN = cast<StoreSDNode>(N);
6390 auto &Op1 = N->getOperand(1);
6391 EVT MemVT = SN->getMemoryVT();
6392 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
6393 // for the extraction to be done on a vMiN value, so that we can use VSTE.
6394 // If X has wider elements then convert it to:
6395 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
6396 if (MemVT.isInteger() && SN->isTruncatingStore()) {
6397 if (SDValue Value =
6398 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
6399 DCI.AddToWorklist(Value.getNode());
6400
6401 // Rewrite the store with the new form of stored value.
6402 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
6403 SN->getBasePtr(), SN->getMemoryVT(),
6404 SN->getMemOperand());
6405 }
6406 }
6407 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
6408 if (!SN->isTruncatingStore() &&
6409 Op1.getOpcode() == ISD::BSWAP &&
6410 Op1.getNode()->hasOneUse() &&
6411 canLoadStoreByteSwapped(Op1.getValueType())) {
6412
6413 SDValue BSwapOp = Op1.getOperand(0);
6414
6415 if (BSwapOp.getValueType() == MVT::i16)
6416 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
6417
6418 SDValue Ops[] = {
6419 N->getOperand(0), BSwapOp, N->getOperand(2)
6420 };
6421
6422 return
6423 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
6424 Ops, MemVT, SN->getMemOperand());
6425 }
6426 // Combine STORE (element-swap) into VSTER
6427 if (!SN->isTruncatingStore() &&
6428 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
6429 Op1.getNode()->hasOneUse() &&
6430 Subtarget.hasVectorEnhancements2()) {
6431 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
6432 ArrayRef<int> ShuffleMask = SVN->getMask();
6433 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
6434 SDValue Ops[] = {
6435 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
6436 };
6437
6438 return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N),
6439 DAG.getVTList(MVT::Other),
6440 Ops, MemVT, SN->getMemOperand());
6441 }
6442 }
6443
6444 // Replicate a reg or immediate with VREP instead of scalar multiply or
6445 // immediate load. It seems best to do this during the first DAGCombine as
6446 // it is straight-forward to handle the zero-extend node in the initial
6447 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
6448 // extracting an i16 element from a v16i8 vector).
6449 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
6450 isOnlyUsedByStores(Op1, DAG)) {
6451 SDValue Word = SDValue();
6452 EVT WordVT;
6453
6454 // Find a replicated immediate and return it if found in Word and its
6455 // type in WordVT.
6456 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
6457 // Some constants are better handled with a scalar store.
6458 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
6459 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
6460 return;
6461 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, C->getZExtValue()));
6462 if (VCI.isVectorConstantLegal(Subtarget) &&
6463 VCI.Opcode == SystemZISD::REPLICATE) {
6464 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
6465 WordVT = VCI.VecVT.getScalarType();
6466 }
6467 };
6468
6469 // Find a replicated register and return it if found in Word and its type
6470 // in WordVT.
6471 auto FindReplicatedReg = [&](SDValue MulOp) {
6472 EVT MulVT = MulOp.getValueType();
6473 if (MulOp->getOpcode() == ISD::MUL &&
6474 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
6475 // Find a zero extended value and its type.
6476 SDValue LHS = MulOp->getOperand(0);
6477 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
6478 WordVT = LHS->getOperand(0).getValueType();
6479 else if (LHS->getOpcode() == ISD::AssertZext)
6480 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
6481 else
6482 return;
6483 // Find a replicating constant, e.g. 0x00010001.
6484 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
6485 SystemZVectorConstantInfo VCI(
6486 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
6487 if (VCI.isVectorConstantLegal(Subtarget) &&
6488 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
6489 WordVT == VCI.VecVT.getScalarType())
6490 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
6491 }
6492 }
6493 };
6494
6495 if (isa<BuildVectorSDNode>(Op1) &&
6496 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
6497 SDValue SplatVal = Op1->getOperand(0);
6498 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
6499 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
6500 else
6501 FindReplicatedReg(SplatVal);
6502 } else {
6503 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
6504 FindReplicatedImm(C, MemVT.getStoreSize());
6505 else
6506 FindReplicatedReg(Op1);
6507 }
6508
6509 if (Word != SDValue()) {
6510 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&(static_cast <bool> (MemVT.getSizeInBits() % WordVT.getSizeInBits
() == 0 && "Bad type handling") ? void (0) : __assert_fail
("MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 && \"Bad type handling\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 6511, __extension__
__PRETTY_FUNCTION__))
6511 "Bad type handling")(static_cast <bool> (MemVT.getSizeInBits() % WordVT.getSizeInBits
() == 0 && "Bad type handling") ? void (0) : __assert_fail
("MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 && \"Bad type handling\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 6511, __extension__
__PRETTY_FUNCTION__))
;
6512 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
6513 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
6514 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
6515 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
6516 SN->getBasePtr(), SN->getMemOperand());
6517 }
6518 }
6519
6520 return SDValue();
6521}
6522
6523SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
6524 SDNode *N, DAGCombinerInfo &DCI) const {
6525 SelectionDAG &DAG = DCI.DAG;
6526 // Combine element-swap (LOAD) into VLER
6527 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
6528 N->getOperand(0).hasOneUse() &&
6529 Subtarget.hasVectorEnhancements2()) {
6530 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
6531 ArrayRef<int> ShuffleMask = SVN->getMask();
6532 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
6533 SDValue Load = N->getOperand(0);
6534 LoadSDNode *LD = cast<LoadSDNode>(Load);
6535
6536 // Create the element-swapping load.
6537 SDValue Ops[] = {
6538 LD->getChain(), // Chain
6539 LD->getBasePtr() // Ptr
6540 };
6541 SDValue ESLoad =
6542 DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N),
6543 DAG.getVTList(LD->getValueType(0), MVT::Other),
6544 Ops, LD->getMemoryVT(), LD->getMemOperand());
6545
6546 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
6547 // by the load dead.
6548 DCI.CombineTo(N, ESLoad);
6549
6550 // Next, combine the load away, we give it a bogus result value but a real
6551 // chain result. The result value is dead because the shuffle is dead.
6552 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
6553
6554 // Return N so it doesn't get rechecked!
6555 return SDValue(N, 0);
6556 }
6557 }
6558
6559 return SDValue();
6560}
6561
6562SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
6563 SDNode *N, DAGCombinerInfo &DCI) const {
6564 SelectionDAG &DAG = DCI.DAG;
6565
6566 if (!Subtarget.hasVector())
6567 return SDValue();
6568
6569 // Look through bitcasts that retain the number of vector elements.
6570 SDValue Op = N->getOperand(0);
6571 if (Op.getOpcode() == ISD::BITCAST &&
6572 Op.getValueType().isVector() &&
6573 Op.getOperand(0).getValueType().isVector() &&
6574 Op.getValueType().getVectorNumElements() ==
6575 Op.getOperand(0).getValueType().getVectorNumElements())
6576 Op = Op.getOperand(0);
6577
6578 // Pull BSWAP out of a vector extraction.
6579 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
6580 EVT VecVT = Op.getValueType();
6581 EVT EltVT = VecVT.getVectorElementType();
6582 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
6583 Op.getOperand(0), N->getOperand(1));
6584 DCI.AddToWorklist(Op.getNode());
6585 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
6586 if (EltVT != N->getValueType(0)) {
6587 DCI.AddToWorklist(Op.getNode());
6588 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
6589 }
6590 return Op;
6591 }
6592
6593 // Try to simplify a vector extraction.
6594 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
6595 SDValue Op0 = N->getOperand(0);
6596 EVT VecVT = Op0.getValueType();
6597 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
6598 IndexN->getZExtValue(), DCI, false);
6599 }
6600 return SDValue();
6601}
6602
6603SDValue SystemZTargetLowering::combineJOIN_DWORDS(
6604 SDNode *N, DAGCombinerInfo &DCI) const {
6605 SelectionDAG &DAG = DCI.DAG;
6606 // (join_dwords X, X) == (replicate X)
6607 if (N->getOperand(0) == N->getOperand(1))
6608 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
6609 N->getOperand(0));
6610 return SDValue();
6611}
6612
6613static SDValue MergeInputChains(SDNode *N1, SDNode *N2) {
6614 SDValue Chain1 = N1->getOperand(0);
6615 SDValue Chain2 = N2->getOperand(0);
6616
6617 // Trivial case: both nodes take the same chain.
6618 if (Chain1 == Chain2)
6619 return Chain1;
6620
6621 // FIXME - we could handle more complex cases via TokenFactor,
6622 // assuming we can verify that this would not create a cycle.
6623 return SDValue();
6624}
6625
6626SDValue SystemZTargetLowering::combineFP_ROUND(
6627 SDNode *N, DAGCombinerInfo &DCI) const {
6628
6629 if (!Subtarget.hasVector())
6630 return SDValue();
6631
6632 // (fpround (extract_vector_elt X 0))
6633 // (fpround (extract_vector_elt X 1)) ->
6634 // (extract_vector_elt (VROUND X) 0)
6635 // (extract_vector_elt (VROUND X) 2)
6636 //
6637 // This is a special case since the target doesn't really support v2f32s.
6638 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
6639 SelectionDAG &DAG = DCI.DAG;
6640 SDValue Op0 = N->getOperand(OpNo);
6641 if (N->getValueType(0) == MVT::f32 &&
6642 Op0.hasOneUse() &&
6643 Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6644 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
6645 Op0.getOperand(1).getOpcode() == ISD::Constant &&
6646 cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
6647 SDValue Vec = Op0.getOperand(0);
6648 for (auto *U : Vec->uses()) {
6649 if (U != Op0.getNode() &&
6650 U->hasOneUse() &&
6651 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6652 U->getOperand(0) == Vec &&
6653 U->getOperand(1).getOpcode() == ISD::Constant &&
6654 cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
6655 SDValue OtherRound = SDValue(*U->use_begin(), 0);
6656 if (OtherRound.getOpcode() == N->getOpcode() &&
6657 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
6658 OtherRound.getValueType() == MVT::f32) {
6659 SDValue VRound, Chain;
6660 if (N->isStrictFPOpcode()) {
6661 Chain = MergeInputChains(N, OtherRound.getNode());
6662 if (!Chain)
6663 continue;
6664 VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N),
6665 {MVT::v4f32, MVT::Other}, {Chain, Vec});
6666 Chain = VRound.getValue(1);
6667 } else
6668 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
6669 MVT::v4f32, Vec);
6670 DCI.AddToWorklist(VRound.getNode());
6671 SDValue Extract1 =
6672 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
6673 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
6674 DCI.AddToWorklist(Extract1.getNode());
6675 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
6676 if (Chain)
6677 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
6678 SDValue Extract0 =
6679 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
6680 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
6681 if (Chain)
6682 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
6683 N->getVTList(), Extract0, Chain);
6684 return Extract0;
6685 }
6686 }
6687 }
6688 }
6689 return SDValue();
6690}
6691
6692SDValue SystemZTargetLowering::combineFP_EXTEND(
6693 SDNode *N, DAGCombinerInfo &DCI) const {
6694
6695 if (!Subtarget.hasVector())
6696 return SDValue();
6697
6698 // (fpextend (extract_vector_elt X 0))
6699 // (fpextend (extract_vector_elt X 2)) ->
6700 // (extract_vector_elt (VEXTEND X) 0)
6701 // (extract_vector_elt (VEXTEND X) 1)
6702 //
6703 // This is a special case since the target doesn't really support v2f32s.
6704 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
6705 SelectionDAG &DAG = DCI.DAG;
6706 SDValue Op0 = N->getOperand(OpNo);
6707 if (N->getValueType(0) == MVT::f64 &&
6708 Op0.hasOneUse() &&
6709 Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6710 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
6711 Op0.getOperand(1).getOpcode() == ISD::Constant &&
6712 cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
6713 SDValue Vec = Op0.getOperand(0);
6714 for (auto *U : Vec->uses()) {
6715 if (U != Op0.getNode() &&
6716 U->hasOneUse() &&
6717 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6718 U->getOperand(0) == Vec &&
6719 U->getOperand(1).getOpcode() == ISD::Constant &&
6720 cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 2) {
6721 SDValue OtherExtend = SDValue(*U->use_begin(), 0);
6722 if (OtherExtend.getOpcode() == N->getOpcode() &&
6723 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
6724 OtherExtend.getValueType() == MVT::f64) {
6725 SDValue VExtend, Chain;
6726 if (N->isStrictFPOpcode()) {
6727 Chain = MergeInputChains(N, OtherExtend.getNode());
6728 if (!Chain)
6729 continue;
6730 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
6731 {MVT::v2f64, MVT::Other}, {Chain, Vec});
6732 Chain = VExtend.getValue(1);
6733 } else
6734 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
6735 MVT::v2f64, Vec);
6736 DCI.AddToWorklist(VExtend.getNode());
6737 SDValue Extract1 =
6738 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
6739 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
6740 DCI.AddToWorklist(Extract1.getNode());
6741 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
6742 if (Chain)
6743 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
6744 SDValue Extract0 =
6745 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
6746 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
6747 if (Chain)
6748 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
6749 N->getVTList(), Extract0, Chain);
6750 return Extract0;
6751 }
6752 }
6753 }
6754 }
6755 return SDValue();
6756}
6757
6758SDValue SystemZTargetLowering::combineINT_TO_FP(
6759 SDNode *N, DAGCombinerInfo &DCI) const {
6760 if (DCI.Level != BeforeLegalizeTypes)
6761 return SDValue();
6762 SelectionDAG &DAG = DCI.DAG;
6763 LLVMContext &Ctx = *DAG.getContext();
6764 unsigned Opcode = N->getOpcode();
6765 EVT OutVT = N->getValueType(0);
6766 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
6767 SDValue Op = N->getOperand(0);
6768 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
6769 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
6770
6771 // Insert an extension before type-legalization to avoid scalarization, e.g.:
6772 // v2f64 = uint_to_fp v2i16
6773 // =>
6774 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
6775 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
6776 OutScalarBits <= 64) {
6777 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
6778 EVT ExtVT = EVT::getVectorVT(
6779 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
6780 unsigned ExtOpcode =
6781 (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND);
6782 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
6783 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
6784 }
6785 return SDValue();
6786}
6787
6788SDValue SystemZTargetLowering::combineBSWAP(
6789 SDNode *N, DAGCombinerInfo &DCI) const {
6790 SelectionDAG &DAG = DCI.DAG;
6791 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
6792 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
6793 N->getOperand(0).hasOneUse() &&
6794 canLoadStoreByteSwapped(N->getValueType(0))) {
6795 SDValue Load = N->getOperand(0);
6796 LoadSDNode *LD = cast<LoadSDNode>(Load);
6797
6798 // Create the byte-swapping load.
6799 SDValue Ops[] = {
6800 LD->getChain(), // Chain
6801 LD->getBasePtr() // Ptr
6802 };
6803 EVT LoadVT = N->getValueType(0);
6804 if (LoadVT == MVT::i16)
6805 LoadVT = MVT::i32;
6806 SDValue BSLoad =
6807 DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
6808 DAG.getVTList(LoadVT, MVT::Other),
6809 Ops, LD->getMemoryVT(), LD->getMemOperand());
6810
6811 // If this is an i16 load, insert the truncate.
6812 SDValue ResVal = BSLoad;
6813 if (N->getValueType(0) == MVT::i16)
6814 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
6815
6816 // First, combine the bswap away. This makes the value produced by the
6817 // load dead.
6818 DCI.CombineTo(N, ResVal);
6819
6820 // Next, combine the load away, we give it a bogus result value but a real
6821 // chain result. The result value is dead because the bswap is dead.
6822 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
6823
6824 // Return N so it doesn't get rechecked!
6825 return SDValue(N, 0);
6826 }
6827
6828 // Look through bitcasts that retain the number of vector elements.
6829 SDValue Op = N->getOperand(0);
6830 if (Op.getOpcode() == ISD::BITCAST &&
6831 Op.getValueType().isVector() &&
6832 Op.getOperand(0).getValueType().isVector() &&
6833 Op.getValueType().getVectorNumElements() ==
6834 Op.getOperand(0).getValueType().getVectorNumElements())
6835 Op = Op.getOperand(0);
6836
6837 // Push BSWAP into a vector insertion if at least one side then simplifies.
6838 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
6839 SDValue Vec = Op.getOperand(0);
6840 SDValue Elt = Op.getOperand(1);
6841 SDValue Idx = Op.getOperand(2);
6842
6843 if (DAG.isConstantIntBuildVectorOrConstantInt(Vec) ||
6844 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
6845 DAG.isConstantIntBuildVectorOrConstantInt(Elt) ||
6846 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
6847 (canLoadStoreByteSwapped(N->getValueType(0)) &&
6848 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
6849 EVT VecVT = N->getValueType(0);
6850 EVT EltVT = N->getValueType(0).getVectorElementType();
6851 if (VecVT != Vec.getValueType()) {
6852 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
6853 DCI.AddToWorklist(Vec.getNode());
6854 }
6855 if (EltVT != Elt.getValueType()) {
6856 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
6857 DCI.AddToWorklist(Elt.getNode());
6858 }
6859 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
6860 DCI.AddToWorklist(Vec.getNode());
6861 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
6862 DCI.AddToWorklist(Elt.getNode());
6863 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
6864 Vec, Elt, Idx);
6865 }
6866 }
6867
6868 // Push BSWAP into a vector shuffle if at least one side then simplifies.
6869 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
6870 if (SV && Op.hasOneUse()) {
6871 SDValue Op0 = Op.getOperand(0);
6872 SDValue Op1 = Op.getOperand(1);
6873
6874 if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) ||
6875 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
6876 DAG.isConstantIntBuildVectorOrConstantInt(Op1) ||
6877 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
6878 EVT VecVT = N->getValueType(0);
6879 if (VecVT != Op0.getValueType()) {
6880 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
6881 DCI.AddToWorklist(Op0.getNode());
6882 }
6883 if (VecVT != Op1.getValueType()) {
6884 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
6885 DCI.AddToWorklist(Op1.getNode());
6886 }
6887 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
6888 DCI.AddToWorklist(Op0.getNode());
6889 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
6890 DCI.AddToWorklist(Op1.getNode());
6891 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
6892 }
6893 }
6894
6895 return SDValue();
6896}
6897
6898static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
6899 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
6900 // set by the CCReg instruction using the CCValid / CCMask masks,
6901 // If the CCReg instruction is itself a ICMP testing the condition
6902 // code set by some other instruction, see whether we can directly
6903 // use that condition code.
6904
6905 // Verify that we have an ICMP against some constant.
6906 if (CCValid != SystemZ::CCMASK_ICMP)
6907 return false;
6908 auto *ICmp = CCReg.getNode();
6909 if (ICmp->getOpcode() != SystemZISD::ICMP)
6910 return false;
6911 auto *CompareLHS = ICmp->getOperand(0).getNode();
6912 auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
6913 if (!CompareRHS)
6914 return false;
6915
6916 // Optimize the case where CompareLHS is a SELECT_CCMASK.
6917 if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
6918 // Verify that we have an appropriate mask for a EQ or NE comparison.
6919 bool Invert = false;
6920 if (CCMask == SystemZ::CCMASK_CMP_NE)
6921 Invert = !Invert;
6922 else if (CCMask != SystemZ::CCMASK_CMP_EQ)
6923 return false;
6924
6925 // Verify that the ICMP compares against one of select values.
6926 auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
6927 if (!TrueVal)
6928 return false;
6929 auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
6930 if (!FalseVal)
6931 return false;
6932 if (CompareRHS->getZExtValue() == FalseVal->getZExtValue())
6933 Invert = !Invert;
6934 else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue())
6935 return false;
6936
6937 // Compute the effective CC mask for the new branch or select.
6938 auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
6939 auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
6940 if (!NewCCValid || !NewCCMask)
6941 return false;
6942 CCValid = NewCCValid->getZExtValue();
6943 CCMask = NewCCMask->getZExtValue();
6944 if (Invert)
6945 CCMask ^= CCValid;
6946
6947 // Return the updated CCReg link.
6948 CCReg = CompareLHS->getOperand(4);
6949 return true;
6950 }
6951
6952 // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
6953 if (CompareLHS->getOpcode() == ISD::SRA) {
6954 auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
6955 if (!SRACount || SRACount->getZExtValue() != 30)
6956 return false;
6957 auto *SHL = CompareLHS->getOperand(0).getNode();
6958 if (SHL->getOpcode() != ISD::SHL)
6959 return false;
6960 auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
6961 if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
6962 return false;
6963 auto *IPM = SHL->getOperand(0).getNode();
6964 if (IPM->getOpcode() != SystemZISD::IPM)
6965 return false;
6966
6967 // Avoid introducing CC spills (because SRA would clobber CC).
6968 if (!CompareLHS->hasOneUse())
6969 return false;
6970 // Verify that the ICMP compares against zero.
6971 if (CompareRHS->getZExtValue() != 0)
6972 return false;
6973
6974 // Compute the effective CC mask for the new branch or select.
6975 CCMask = SystemZ::reverseCCMask(CCMask);
6976
6977 // Return the updated CCReg link.
6978 CCReg = IPM->getOperand(0);
6979 return true;
6980 }
6981
6982 return false;
6983}
6984
6985SDValue SystemZTargetLowering::combineBR_CCMASK(
6986 SDNode *N, DAGCombinerInfo &DCI) const {
6987 SelectionDAG &DAG = DCI.DAG;
6988
6989 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
6990 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
6991 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
6992 if (!CCValid || !CCMask)
6993 return SDValue();
6994
6995 int CCValidVal = CCValid->getZExtValue();
6996 int CCMaskVal = CCMask->getZExtValue();
6997 SDValue Chain = N->getOperand(0);
6998 SDValue CCReg = N->getOperand(4);
6999
7000 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7001 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
7002 Chain,
7003 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7004 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7005 N->getOperand(3), CCReg);
7006 return SDValue();
7007}
7008
7009SDValue SystemZTargetLowering::combineSELECT_CCMASK(
7010 SDNode *N, DAGCombinerInfo &DCI) const {
7011 SelectionDAG &DAG = DCI.DAG;
7012
7013 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
7014 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
7015 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
7016 if (!CCValid || !CCMask)
7017 return SDValue();
7018
7019 int CCValidVal = CCValid->getZExtValue();
7020 int CCMaskVal = CCMask->getZExtValue();
7021 SDValue CCReg = N->getOperand(4);
7022
7023 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7024 return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
7025 N->getOperand(0), N->getOperand(1),
7026 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7027 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7028 CCReg);
7029 return SDValue();
7030}
7031
7032
7033SDValue SystemZTargetLowering::combineGET_CCMASK(
7034 SDNode *N, DAGCombinerInfo &DCI) const {
7035
7036 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
7037 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
7038 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
7039 if (!CCValid || !CCMask)
7040 return SDValue();
7041 int CCValidVal = CCValid->getZExtValue();
7042 int CCMaskVal = CCMask->getZExtValue();
7043
7044 SDValue Select = N->getOperand(0);
7045 if (Select->getOpcode() == ISD::TRUNCATE)
7046 Select = Select->getOperand(0);
7047 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
7048 return SDValue();
7049
7050 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
7051 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
7052 if (!SelectCCValid || !SelectCCMask)
7053 return SDValue();
7054 int SelectCCValidVal = SelectCCValid->getZExtValue();
7055 int SelectCCMaskVal = SelectCCMask->getZExtValue();
7056
7057 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
7058 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
7059 if (!TrueVal || !FalseVal)
7060 return SDValue();
7061 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
7062 ;
7063 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
7064 SelectCCMaskVal ^= SelectCCValidVal;
7065 else
7066 return SDValue();
7067
7068 if (SelectCCValidVal & ~CCValidVal)
7069 return SDValue();
7070 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
7071 return SDValue();
7072
7073 return Select->getOperand(4);
7074}
7075
7076SDValue SystemZTargetLowering::combineIntDIVREM(
7077 SDNode *N, DAGCombinerInfo &DCI) const {
7078 SelectionDAG &DAG = DCI.DAG;
7079 EVT VT = N->getValueType(0);
7080 // In the case where the divisor is a vector of constants a cheaper
7081 // sequence of instructions can replace the divide. BuildSDIV is called to
7082 // do this during DAG combining, but it only succeeds when it can build a
7083 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
7084 // since it is not Legal but Custom it can only happen before
7085 // legalization. Therefore we must scalarize this early before Combine
7086 // 1. For widened vectors, this is already the result of type legalization.
7087 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
7088 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
7089 return DAG.UnrollVectorOp(N);
7090 return SDValue();
7091}
7092
7093SDValue SystemZTargetLowering::combineINTRINSIC(
7094 SDNode *N, DAGCombinerInfo &DCI) const {
7095 SelectionDAG &DAG = DCI.DAG;
7096
7097 unsigned Id = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
7098 switch (Id) {
7099 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
7100 // or larger is simply a vector load.
7101 case Intrinsic::s390_vll:
7102 case Intrinsic::s390_vlrl:
7103 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
7104 if (C->getZExtValue() >= 15)
7105 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
7106 N->getOperand(3), MachinePointerInfo());
7107 break;
7108 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
7109 case Intrinsic::s390_vstl:
7110 case Intrinsic::s390_vstrl:
7111 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
7112 if (C->getZExtValue() >= 15)
7113 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
7114 N->getOperand(4), MachinePointerInfo());
7115 break;
7116 }
7117
7118 return SDValue();
7119}
7120
7121SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
7122 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
7123 return N->getOperand(0);
7124 return N;
7125}
7126
7127SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
7128 DAGCombinerInfo &DCI) const {
7129 switch(N->getOpcode()) {
7130 default: break;
7131 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
7132 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
7133 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
7134 case SystemZISD::MERGE_HIGH:
7135 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
7136 case ISD::LOAD: return combineLOAD(N, DCI);
7137 case ISD::STORE: return combineSTORE(N, DCI);
7138 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
7139 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
7140 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
7141 case ISD::STRICT_FP_ROUND:
7142 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
7143 case ISD::STRICT_FP_EXTEND:
7144 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
7145 case ISD::SINT_TO_FP:
7146 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
7147 case ISD::BSWAP: return combineBSWAP(N, DCI);
7148 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
7149 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
7150 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
7151 case ISD::SDIV:
7152 case ISD::UDIV:
7153 case ISD::SREM:
7154 case ISD::UREM: return combineIntDIVREM(N, DCI);
7155 case ISD::INTRINSIC_W_CHAIN:
7156 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
7157 }
7158
7159 return SDValue();
7160}
7161
7162// Return the demanded elements for the OpNo source operand of Op. DemandedElts
7163// are for Op.
7164static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
7165 unsigned OpNo) {
7166 EVT VT = Op.getValueType();
7167 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
7168 APInt SrcDemE;
7169 unsigned Opcode = Op.getOpcode();
7170 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7171 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7172 switch (Id) {
7173 case Intrinsic::s390_vpksh: // PACKS
7174 case Intrinsic::s390_vpksf:
7175 case Intrinsic::s390_vpksg:
7176 case Intrinsic::s390_vpkshs: // PACKS_CC
7177 case Intrinsic::s390_vpksfs:
7178 case Intrinsic::s390_vpksgs:
7179 case Intrinsic::s390_vpklsh: // PACKLS
7180 case Intrinsic::s390_vpklsf:
7181 case Intrinsic::s390_vpklsg:
7182 case Intrinsic::s390_vpklshs: // PACKLS_CC
7183 case Intrinsic::s390_vpklsfs:
7184 case Intrinsic::s390_vpklsgs:
7185 // VECTOR PACK truncates the elements of two source vectors into one.
7186 SrcDemE = DemandedElts;
7187 if (OpNo == 2)
7188 SrcDemE.lshrInPlace(NumElts / 2);
7189 SrcDemE = SrcDemE.trunc(NumElts / 2);
7190 break;
7191 // VECTOR UNPACK extends half the elements of the source vector.
7192 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7193 case Intrinsic::s390_vuphh:
7194 case Intrinsic::s390_vuphf:
7195 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
7196 case Intrinsic::s390_vuplhh:
7197 case Intrinsic::s390_vuplhf:
7198 SrcDemE = APInt(NumElts * 2, 0);
7199 SrcDemE.insertBits(DemandedElts, 0);
7200 break;
7201 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7202 case Intrinsic::s390_vuplhw:
7203 case Intrinsic::s390_vuplf:
7204 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
7205 case Intrinsic::s390_vupllh:
7206 case Intrinsic::s390_vupllf:
7207 SrcDemE = APInt(NumElts * 2, 0);
7208 SrcDemE.insertBits(DemandedElts, NumElts);
7209 break;
7210 case Intrinsic::s390_vpdi: {
7211 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
7212 SrcDemE = APInt(NumElts, 0);
7213 if (!DemandedElts[OpNo - 1])
7214 break;
7215 unsigned Mask = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
7216 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
7217 // Demand input element 0 or 1, given by the mask bit value.
7218 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
7219 break;
7220 }
7221 case Intrinsic::s390_vsldb: {
7222 // VECTOR SHIFT LEFT DOUBLE BY BYTE
7223 assert(VT == MVT::v16i8 && "Unexpected type.")(static_cast <bool> (VT == MVT::v16i8 && "Unexpected type."
) ? void (0) : __assert_fail ("VT == MVT::v16i8 && \"Unexpected type.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7223, __extension__
__PRETTY_FUNCTION__))
;
7224 unsigned FirstIdx = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
7225 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.")(static_cast <bool> (FirstIdx > 0 && FirstIdx
< 16 && "Unused operand.") ? void (0) : __assert_fail
("FirstIdx > 0 && FirstIdx < 16 && \"Unused operand.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7225, __extension__
__PRETTY_FUNCTION__))
;
7226 unsigned NumSrc0Els = 16 - FirstIdx;
7227 SrcDemE = APInt(NumElts, 0);
7228 if (OpNo == 1) {
7229 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
7230 SrcDemE.insertBits(DemEls, FirstIdx);
7231 } else {
7232 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
7233 SrcDemE.insertBits(DemEls, 0);
7234 }
7235 break;
7236 }
7237 case Intrinsic::s390_vperm:
7238 SrcDemE = APInt(NumElts, 1);
7239 break;
7240 default:
7241 llvm_unreachable("Unhandled intrinsic.")::llvm::llvm_unreachable_internal("Unhandled intrinsic.", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 7241)
;
7242 break;
7243 }
7244 } else {
7245 switch (Opcode) {
7246 case SystemZISD::JOIN_DWORDS:
7247 // Scalar operand.
7248 SrcDemE = APInt(1, 1);
7249 break;
7250 case SystemZISD::SELECT_CCMASK:
7251 SrcDemE = DemandedElts;
7252 break;
7253 default:
7254 llvm_unreachable("Unhandled opcode.")::llvm::llvm_unreachable_internal("Unhandled opcode.", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 7254)
;
7255 break;
7256 }
7257 }
7258 return SrcDemE;
7259}
7260
7261static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
7262 const APInt &DemandedElts,
7263 const SelectionDAG &DAG, unsigned Depth,
7264 unsigned OpNo) {
7265 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
7266 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
7267 KnownBits LHSKnown =
7268 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
7269 KnownBits RHSKnown =
7270 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
7271 Known = KnownBits::commonBits(LHSKnown, RHSKnown);
7272}
7273
7274void
7275SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
7276 KnownBits &Known,
7277 const APInt &DemandedElts,
7278 const SelectionDAG &DAG,
7279 unsigned Depth) const {
7280 Known.resetAll();
7281
7282 // Intrinsic CC result is returned in the two low bits.
7283 unsigned tmp0, tmp1; // not used
7284 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) {
7285 Known.Zero.setBitsFrom(2);
7286 return;
7287 }
7288 EVT VT = Op.getValueType();
7289 if (Op.getResNo() != 0 || VT == MVT::Untyped)
7290 return;
7291 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&(static_cast <bool> (Known.getBitWidth() == VT.getScalarSizeInBits
() && "KnownBits does not match VT in bitwidth") ? void
(0) : __assert_fail ("Known.getBitWidth() == VT.getScalarSizeInBits() && \"KnownBits does not match VT in bitwidth\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7292, __extension__
__PRETTY_FUNCTION__))
7292 "KnownBits does not match VT in bitwidth")(static_cast <bool> (Known.getBitWidth() == VT.getScalarSizeInBits
() && "KnownBits does not match VT in bitwidth") ? void
(0) : __assert_fail ("Known.getBitWidth() == VT.getScalarSizeInBits() && \"KnownBits does not match VT in bitwidth\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7292, __extension__
__PRETTY_FUNCTION__))
;
7293 assert ((!VT.isVector() ||(static_cast <bool> ((!VT.isVector() || (DemandedElts.getBitWidth
() == VT.getVectorNumElements())) && "DemandedElts does not match VT number of elements"
) ? void (0) : __assert_fail ("(!VT.isVector() || (DemandedElts.getBitWidth() == VT.getVectorNumElements())) && \"DemandedElts does not match VT number of elements\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7295, __extension__
__PRETTY_FUNCTION__))
7294 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&(static_cast <bool> ((!VT.isVector() || (DemandedElts.getBitWidth
() == VT.getVectorNumElements())) && "DemandedElts does not match VT number of elements"
) ? void (0) : __assert_fail ("(!VT.isVector() || (DemandedElts.getBitWidth() == VT.getVectorNumElements())) && \"DemandedElts does not match VT number of elements\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7295, __extension__
__PRETTY_FUNCTION__))
7295 "DemandedElts does not match VT number of elements")(static_cast <bool> ((!VT.isVector() || (DemandedElts.getBitWidth
() == VT.getVectorNumElements())) && "DemandedElts does not match VT number of elements"
) ? void (0) : __assert_fail ("(!VT.isVector() || (DemandedElts.getBitWidth() == VT.getVectorNumElements())) && \"DemandedElts does not match VT number of elements\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7295, __extension__
__PRETTY_FUNCTION__))
;
7296 unsigned BitWidth = Known.getBitWidth();
7297 unsigned Opcode = Op.getOpcode();
7298 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7299 bool IsLogical = false;
7300 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7301 switch (Id) {
7302 case Intrinsic::s390_vpksh: // PACKS
7303 case Intrinsic::s390_vpksf:
7304 case Intrinsic::s390_vpksg:
7305 case Intrinsic::s390_vpkshs: // PACKS_CC
7306 case Intrinsic::s390_vpksfs:
7307 case Intrinsic::s390_vpksgs:
7308 case Intrinsic::s390_vpklsh: // PACKLS
7309 case Intrinsic::s390_vpklsf:
7310 case Intrinsic::s390_vpklsg:
7311 case Intrinsic::s390_vpklshs: // PACKLS_CC
7312 case Intrinsic::s390_vpklsfs:
7313 case Intrinsic::s390_vpklsgs:
7314 case Intrinsic::s390_vpdi:
7315 case Intrinsic::s390_vsldb:
7316 case Intrinsic::s390_vperm:
7317 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
7318 break;
7319 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
7320 case Intrinsic::s390_vuplhh:
7321 case Intrinsic::s390_vuplhf:
7322 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
7323 case Intrinsic::s390_vupllh:
7324 case Intrinsic::s390_vupllf:
7325 IsLogical = true;
7326 [[fallthrough]];
7327 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7328 case Intrinsic::s390_vuphh:
7329 case Intrinsic::s390_vuphf:
7330 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7331 case Intrinsic::s390_vuplhw:
7332 case Intrinsic::s390_vuplf: {
7333 SDValue SrcOp = Op.getOperand(1);
7334 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
7335 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
7336 if (IsLogical) {
7337 Known = Known.zext(BitWidth);
7338 } else
7339 Known = Known.sext(BitWidth);
7340 break;
7341 }
7342 default:
7343 break;
7344 }
7345 } else {
7346 switch (Opcode) {
7347 case SystemZISD::JOIN_DWORDS:
7348 case SystemZISD::SELECT_CCMASK:
7349 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
7350 break;
7351 case SystemZISD::REPLICATE: {
7352 SDValue SrcOp = Op.getOperand(0);
7353 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
7354 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
7355 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
7356 break;
7357 }
7358 default:
7359 break;
7360 }
7361 }
7362
7363 // Known has the width of the source operand(s). Adjust if needed to match
7364 // the passed bitwidth.
7365 if (Known.getBitWidth() != BitWidth)
7366 Known = Known.anyextOrTrunc(BitWidth);
7367}
7368
7369static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
7370 const SelectionDAG &DAG, unsigned Depth,
7371 unsigned OpNo) {
7372 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
7373 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
7374 if (LHS == 1) return 1; // Early out.
7375 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
7376 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
7377 if (RHS == 1) return 1; // Early out.
7378 unsigned Common = std::min(LHS, RHS);
7379 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
7380 EVT VT = Op.getValueType();
7381 unsigned VTBits = VT.getScalarSizeInBits();
7382 if (SrcBitWidth > VTBits) { // PACK
7383 unsigned SrcExtraBits = SrcBitWidth - VTBits;
7384 if (Common > SrcExtraBits)
7385 return (Common - SrcExtraBits);
7386 return 1;
7387 }
7388 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.")(static_cast <bool> (SrcBitWidth == VTBits && "Expected operands of same bitwidth."
) ? void (0) : __assert_fail ("SrcBitWidth == VTBits && \"Expected operands of same bitwidth.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7388, __extension__
__PRETTY_FUNCTION__))
;
7389 return Common;
7390}
7391
7392unsigned
7393SystemZTargetLowering::ComputeNumSignBitsForTargetNode(
7394 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
7395 unsigned Depth) const {
7396 if (Op.getResNo() != 0)
7397 return 1;
7398 unsigned Opcode = Op.getOpcode();
7399 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7400 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7401 switch (Id) {
7402 case Intrinsic::s390_vpksh: // PACKS
7403 case Intrinsic::s390_vpksf:
7404 case Intrinsic::s390_vpksg:
7405 case Intrinsic::s390_vpkshs: // PACKS_CC
7406 case Intrinsic::s390_vpksfs:
7407 case Intrinsic::s390_vpksgs:
7408 case Intrinsic::s390_vpklsh: // PACKLS
7409 case Intrinsic::s390_vpklsf:
7410 case Intrinsic::s390_vpklsg:
7411 case Intrinsic::s390_vpklshs: // PACKLS_CC
7412 case Intrinsic::s390_vpklsfs:
7413 case Intrinsic::s390_vpklsgs:
7414 case Intrinsic::s390_vpdi:
7415 case Intrinsic::s390_vsldb:
7416 case Intrinsic::s390_vperm:
7417 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
7418 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7419 case Intrinsic::s390_vuphh:
7420 case Intrinsic::s390_vuphf:
7421 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7422 case Intrinsic::s390_vuplhw:
7423 case Intrinsic::s390_vuplf: {
7424 SDValue PackedOp = Op.getOperand(1);
7425 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
7426 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
7427 EVT VT = Op.getValueType();
7428 unsigned VTBits = VT.getScalarSizeInBits();
7429 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
7430 return Tmp;
7431 }
7432 default:
7433 break;
7434 }
7435 } else {
7436 switch (Opcode) {
7437 case SystemZISD::SELECT_CCMASK:
7438 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
7439 default:
7440 break;
7441 }
7442 }
7443
7444 return 1;
7445}
7446
7447unsigned
7448SystemZTargetLowering::getStackProbeSize(const MachineFunction &MF) const {
7449 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
7450 unsigned StackAlign = TFI->getStackAlignment();
7451 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&(static_cast <bool> (StackAlign >=1 && isPowerOf2_32
(StackAlign) && "Unexpected stack alignment") ? void (
0) : __assert_fail ("StackAlign >=1 && isPowerOf2_32(StackAlign) && \"Unexpected stack alignment\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7452, __extension__
__PRETTY_FUNCTION__))
7452 "Unexpected stack alignment")(static_cast <bool> (StackAlign >=1 && isPowerOf2_32
(StackAlign) && "Unexpected stack alignment") ? void (
0) : __assert_fail ("StackAlign >=1 && isPowerOf2_32(StackAlign) && \"Unexpected stack alignment\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7452, __extension__
__PRETTY_FUNCTION__))
;
7453 // The default stack probe size is 4096 if the function has no
7454 // stack-probe-size attribute.
7455 unsigned StackProbeSize =
7456 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
7457 // Round down to the stack alignment.
7458 StackProbeSize &= ~(StackAlign - 1);
7459 return StackProbeSize ? StackProbeSize : StackAlign;
7460}
7461
7462//===----------------------------------------------------------------------===//
7463// Custom insertion
7464//===----------------------------------------------------------------------===//
7465
7466// Force base value Base into a register before MI. Return the register.
7467static Register forceReg(MachineInstr &MI, MachineOperand &Base,
7468 const SystemZInstrInfo *TII) {
7469 MachineBasicBlock *MBB = MI.getParent();
7470 MachineFunction &MF = *MBB->getParent();
7471 MachineRegisterInfo &MRI = MF.getRegInfo();
7472
7473 if (Base.isReg()) {
7474 // Copy Base into a new virtual register to help register coalescing in
7475 // cases with multiple uses.
7476 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
7477 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
7478 .add(Base);
7479 return Reg;
7480 }
7481
7482 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
7483 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
7484 .add(Base)
7485 .addImm(0)
7486 .addReg(0);
7487 return Reg;
7488}
7489
7490// The CC operand of MI might be missing a kill marker because there
7491// were multiple uses of CC, and ISel didn't know which to mark.
7492// Figure out whether MI should have had a kill marker.
7493static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB) {
7494 // Scan forward through BB for a use/def of CC.
7495 MachineBasicBlock::iterator miI(std::next(MachineBasicBlock::iterator(MI)));
7496 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
7497 const MachineInstr& mi = *miI;
7498 if (mi.readsRegister(SystemZ::CC))
7499 return false;
7500 if (mi.definesRegister(SystemZ::CC))
7501 break; // Should have kill-flag - update below.
7502 }
7503
7504 // If we hit the end of the block, check whether CC is live into a
7505 // successor.
7506 if (miI == MBB->end()) {
7507 for (const MachineBasicBlock *Succ : MBB->successors())
7508 if (Succ->isLiveIn(SystemZ::CC))
7509 return false;
7510 }
7511
7512 return true;
7513}
7514
7515// Return true if it is OK for this Select pseudo-opcode to be cascaded
7516// together with other Select pseudo-opcodes into a single basic-block with
7517// a conditional jump around it.
7518static bool isSelectPseudo(MachineInstr &MI) {
7519 switch (MI.getOpcode()) {
7520 case SystemZ::Select32:
7521 case SystemZ::Select64:
7522 case SystemZ::SelectF32:
7523 case SystemZ::SelectF64:
7524 case SystemZ::SelectF128:
7525 case SystemZ::SelectVR32:
7526 case SystemZ::SelectVR64:
7527 case SystemZ::SelectVR128:
7528 return true;
7529
7530 default:
7531 return false;
7532 }
7533}
7534
7535// Helper function, which inserts PHI functions into SinkMBB:
7536// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
7537// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
7538static void createPHIsForSelects(SmallVector<MachineInstr*, 8> &Selects,
7539 MachineBasicBlock *TrueMBB,
7540 MachineBasicBlock *FalseMBB,
7541 MachineBasicBlock *SinkMBB) {
7542 MachineFunction *MF = TrueMBB->getParent();
7543 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
7544
7545 MachineInstr *FirstMI = Selects.front();
7546 unsigned CCValid = FirstMI->getOperand(3).getImm();
7547 unsigned CCMask = FirstMI->getOperand(4).getImm();
7548
7549 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
7550
7551 // As we are creating the PHIs, we have to be careful if there is more than
7552 // one. Later Selects may reference the results of earlier Selects, but later
7553 // PHIs have to reference the individual true/false inputs from earlier PHIs.
7554 // That also means that PHI construction must work forward from earlier to
7555 // later, and that the code must maintain a mapping from earlier PHI's
7556 // destination registers, and the registers that went into the PHI.
7557 DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;
7558
7559 for (auto *MI : Selects) {
7560 Register DestReg = MI->getOperand(0).getReg();
7561 Register TrueReg = MI->getOperand(1).getReg();
7562 Register FalseReg = MI->getOperand(2).getReg();
7563
7564 // If this Select we are generating is the opposite condition from
7565 // the jump we generated, then we have to swap the operands for the
7566 // PHI that is going to be generated.
7567 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
7568 std::swap(TrueReg, FalseReg);
7569
7570 if (RegRewriteTable.find(TrueReg) != RegRewriteTable.end())
7571 TrueReg = RegRewriteTable[TrueReg].first;
7572
7573 if (RegRewriteTable.find(FalseReg) != RegRewriteTable.end())
7574 FalseReg = RegRewriteTable[FalseReg].second;
7575
7576 DebugLoc DL = MI->getDebugLoc();
7577 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
7578 .addReg(TrueReg).addMBB(TrueMBB)
7579 .addReg(FalseReg).addMBB(FalseMBB);
7580
7581 // Add this PHI to the rewrite table.
7582 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
7583 }
7584
7585 MF->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
7586}
7587
7588// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
7589MachineBasicBlock *
7590SystemZTargetLowering::emitSelect(MachineInstr &MI,
7591 MachineBasicBlock *MBB) const {
7592 assert(isSelectPseudo(MI) && "Bad call to emitSelect()")(static_cast <bool> (isSelectPseudo(MI) && "Bad call to emitSelect()"
) ? void (0) : __assert_fail ("isSelectPseudo(MI) && \"Bad call to emitSelect()\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7592, __extension__
__PRETTY_FUNCTION__))
;
7593 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
7594
7595 unsigned CCValid = MI.getOperand(3).getImm();
7596 unsigned CCMask = MI.getOperand(4).getImm();
7597
7598 // If we have a sequence of Select* pseudo instructions using the
7599 // same condition code value, we want to expand all of them into
7600 // a single pair of basic blocks using the same condition.
7601 SmallVector<MachineInstr*, 8> Selects;
7602 SmallVector<MachineInstr*, 8> DbgValues;
7603 Selects.push_back(&MI);
7604 unsigned Count = 0;
7605 for (MachineInstr &NextMI : llvm::make_range(
7606 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
7607 if (isSelectPseudo(NextMI)) {
7608 assert(NextMI.getOperand(3).getImm() == CCValid &&(static_cast <bool> (NextMI.getOperand(3).getImm() == CCValid
&& "Bad CCValid operands since CC was not redefined."
) ? void (0) : __assert_fail ("NextMI.getOperand(3).getImm() == CCValid && \"Bad CCValid operands since CC was not redefined.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7609, __extension__
__PRETTY_FUNCTION__))
7609 "Bad CCValid operands since CC was not redefined.")(static_cast <bool> (NextMI.getOperand(3).getImm() == CCValid
&& "Bad CCValid operands since CC was not redefined."
) ? void (0) : __assert_fail ("NextMI.getOperand(3).getImm() == CCValid && \"Bad CCValid operands since CC was not redefined.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7609, __extension__
__PRETTY_FUNCTION__))
;
7610 if (NextMI.getOperand(4).getImm() == CCMask ||
7611 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
7612 Selects.push_back(&NextMI);
7613 continue;
7614 }
7615 break;
7616 }
7617 if (NextMI.definesRegister(SystemZ::CC) || NextMI.usesCustomInsertionHook())
7618 break;
7619 bool User = false;
7620 for (auto *SelMI : Selects)
7621 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
7622 User = true;
7623 break;
7624 }
7625 if (NextMI.isDebugInstr()) {
7626 if (User) {
7627 assert(NextMI.isDebugValue() && "Unhandled debug opcode.")(static_cast <bool> (NextMI.isDebugValue() && "Unhandled debug opcode."
) ? void (0) : __assert_fail ("NextMI.isDebugValue() && \"Unhandled debug opcode.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7627, __extension__
__PRETTY_FUNCTION__))
;
7628 DbgValues.push_back(&NextMI);
7629 }
7630 } else if (User || ++Count > 20)
7631 break;
7632 }
7633
7634 MachineInstr *LastMI = Selects.back();
7635 bool CCKilled =
7636 (LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB));
7637 MachineBasicBlock *StartMBB = MBB;
7638 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB);
7639 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
7640
7641 // Unless CC was killed in the last Select instruction, mark it as
7642 // live-in to both FalseMBB and JoinMBB.
7643 if (!CCKilled) {
7644 FalseMBB->addLiveIn(SystemZ::CC);
7645 JoinMBB->addLiveIn(SystemZ::CC);
7646 }
7647
7648 // StartMBB:
7649 // BRC CCMask, JoinMBB
7650 // # fallthrough to FalseMBB
7651 MBB = StartMBB;
7652 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
7653 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
7654 MBB->addSuccessor(JoinMBB);
7655 MBB->addSuccessor(FalseMBB);
7656
7657 // FalseMBB:
7658 // # fallthrough to JoinMBB
7659 MBB = FalseMBB;
7660 MBB->addSuccessor(JoinMBB);
7661
7662 // JoinMBB:
7663 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
7664 // ...
7665 MBB = JoinMBB;
7666 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
7667 for (auto *SelMI : Selects)
7668 SelMI->eraseFromParent();
7669
7670 MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI();
7671 for (auto *DbgMI : DbgValues)
7672 MBB->splice(InsertPos, StartMBB, DbgMI);
7673
7674 return JoinMBB;
7675}
7676
7677// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
7678// StoreOpcode is the store to use and Invert says whether the store should
7679// happen when the condition is false rather than true. If a STORE ON
7680// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
7681MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
7682 MachineBasicBlock *MBB,
7683 unsigned StoreOpcode,
7684 unsigned STOCOpcode,
7685 bool Invert) const {
7686 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
7687
7688 Register SrcReg = MI.getOperand(0).getReg();
7689 MachineOperand Base = MI.getOperand(1);
7690 int64_t Disp = MI.getOperand(2).getImm();
7691 Register IndexReg = MI.getOperand(3).getReg();
7692 unsigned CCValid = MI.getOperand(4).getImm();
7693 unsigned CCMask = MI.getOperand(5).getImm();
7694 DebugLoc DL = MI.getDebugLoc();
7695
7696 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
7697
7698 // ISel pattern matching also adds a load memory operand of the same
7699 // address, so take special care to find the storing memory operand.
7700 MachineMemOperand *MMO = nullptr;
7701 for (auto *I : MI.memoperands())
7702 if (I->isStore()) {
7703 MMO = I;
7704 break;
7705 }
7706
7707 // Use STOCOpcode if possible. We could use different store patterns in
7708 // order to avoid matching the index register, but the performance trade-offs
7709 // might be more complicated in that case.
7710 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
7711 if (Invert)
7712 CCMask ^= CCValid;
7713
7714 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
7715 .addReg(SrcReg)
7716 .add(Base)
7717 .addImm(Disp)
7718 .addImm(CCValid)
7719 .addImm(CCMask)
7720 .addMemOperand(MMO);
7721
7722 MI.eraseFromParent();
7723 return MBB;
7724 }
7725
7726 // Get the condition needed to branch around the store.
7727 if (!Invert)
7728 CCMask ^= CCValid;
7729
7730 MachineBasicBlock *StartMBB = MBB;
7731 MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB);
7732 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
7733
7734 // Unless CC was killed in the CondStore instruction, mark it as
7735 // live-in to both FalseMBB and JoinMBB.
7736 if (!MI.killsRegister(SystemZ::CC) && !checkCCKill(MI, JoinMBB)) {
7737 FalseMBB->addLiveIn(SystemZ::CC);
7738 JoinMBB->addLiveIn(SystemZ::CC);
7739 }
7740
7741 // StartMBB:
7742 // BRC CCMask, JoinMBB
7743 // # fallthrough to FalseMBB
7744 MBB = StartMBB;
7745 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7746 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
7747 MBB->addSuccessor(JoinMBB);
7748 MBB->addSuccessor(FalseMBB);
7749
7750 // FalseMBB:
7751 // store %SrcReg, %Disp(%Index,%Base)
7752 // # fallthrough to JoinMBB
7753 MBB = FalseMBB;
7754 BuildMI(MBB, DL, TII->get(StoreOpcode))
7755 .addReg(SrcReg)
7756 .add(Base)
7757 .addImm(Disp)
7758 .addReg(IndexReg)
7759 .addMemOperand(MMO);
7760 MBB->addSuccessor(JoinMBB);
7761
7762 MI.eraseFromParent();
7763 return JoinMBB;
7764}
7765
7766// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
7767// or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that
7768// performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
7769// BitSize is the width of the field in bits, or 0 if this is a partword
7770// ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
7771// is one of the operands. Invert says whether the field should be
7772// inverted after performing BinOpcode (e.g. for NAND).
7773MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
7774 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
7775 unsigned BitSize, bool Invert) const {
7776 MachineFunction &MF = *MBB->getParent();
7777 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
7778 MachineRegisterInfo &MRI = MF.getRegInfo();
7779 bool IsSubWord = (BitSize < 32);
7780
7781 // Extract the operands. Base can be a register or a frame index.
7782 // Src2 can be a register or immediate.
7783 Register Dest = MI.getOperand(0).getReg();
7784 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
7785 int64_t Disp = MI.getOperand(2).getImm();
7786 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
7787 Register BitShift = IsSubWord ? MI.getOperand(4).getReg() : Register();
7788 Register NegBitShift = IsSubWord ? MI.getOperand(5).getReg() : Register();
7789 DebugLoc DL = MI.getDebugLoc();
7790 if (IsSubWord)
7791 BitSize = MI.getOperand(6).getImm();
7792
7793 // Subword operations use 32-bit registers.
7794 const TargetRegisterClass *RC = (BitSize <= 32 ?
7795 &SystemZ::GR32BitRegClass :
7796 &SystemZ::GR64BitRegClass);
7797 unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
7798 unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
7799
7800 // Get the right opcodes for the displacement.
7801 LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
7802 CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
7803 assert(LOpcode && CSOpcode && "Displacement out of range")(static_cast <bool> (LOpcode && CSOpcode &&
"Displacement out of range") ? void (0) : __assert_fail ("LOpcode && CSOpcode && \"Displacement out of range\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7803, __extension__
__PRETTY_FUNCTION__))
;
7804
7805 // Create virtual registers for temporary results.
7806 Register OrigVal = MRI.createVirtualRegister(RC);
7807 Register OldVal = MRI.createVirtualRegister(RC);
7808 Register NewVal = (BinOpcode || IsSubWord ?
7809 MRI.createVirtualRegister(RC) : Src2.getReg());
7810 Register RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
7811 Register RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
7812
7813 // Insert a basic block for the main loop.
7814 MachineBasicBlock *StartMBB = MBB;
7815 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
7816 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
7817
7818 // StartMBB:
7819 // ...
7820 // %OrigVal = L Disp(%Base)
7821 // # fall through to LoopMBB
7822 MBB = StartMBB;
7823 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
7824 MBB->addSuccessor(LoopMBB);
7825
7826 // LoopMBB:
7827 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
7828 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
7829 // %RotatedNewVal = OP %RotatedOldVal, %Src2
7830 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
7831 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
7832 // JNE LoopMBB
7833 // # fall through to DoneMBB
7834 MBB = LoopMBB;
7835 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
7836 .addReg(OrigVal).addMBB(StartMBB)
7837 .addReg(Dest).addMBB(LoopMBB);
7838 if (IsSubWord)
7839 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
7840 .addReg(OldVal).addReg(BitShift).addImm(0);
7841 if (Invert) {
7842 // Perform the operation normally and then invert every bit of the field.
7843 Register Tmp = MRI.createVirtualRegister(RC);
7844 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
7845 if (BitSize <= 32)
7846 // XILF with the upper BitSize bits set.
7847 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
7848 .addReg(Tmp).addImm(-1U << (32 - BitSize));
7849 else {
7850 // Use LCGR and add -1 to the result, which is more compact than
7851 // an XILF, XILH pair.
7852 Register Tmp2 = MRI.createVirtualRegister(RC);
7853 BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp);
7854 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal)
7855 .addReg(Tmp2).addImm(-1);
7856 }
7857 } else if (BinOpcode)
7858 // A simply binary operation.
7859 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
7860 .addReg(RotatedOldVal)
7861 .add(Src2);
7862 else if (IsSubWord)
7863 // Use RISBG to rotate Src2 into position and use it to replace the
7864 // field in RotatedOldVal.
7865 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
7866 .addReg(RotatedOldVal).addReg(Src2.getReg())
7867 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
7868 if (IsSubWord)
7869 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
7870 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
7871 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
7872 .addReg(OldVal)
7873 .addReg(NewVal)
7874 .add(Base)
7875 .addImm(Disp);
7876 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7877 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
7878 MBB->addSuccessor(LoopMBB);
7879 MBB->addSuccessor(DoneMBB);
7880
7881 MI.eraseFromParent();
7882 return DoneMBB;
7883}
7884
7885// Implement EmitInstrWithCustomInserter for pseudo
7886// ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
7887// instruction that should be used to compare the current field with the
7888// minimum or maximum value. KeepOldMask is the BRC condition-code mask
7889// for when the current field should be kept. BitSize is the width of
7890// the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
7891MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
7892 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
7893 unsigned KeepOldMask, unsigned BitSize) const {
7894 MachineFunction &MF = *MBB->getParent();
7895 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
7896 MachineRegisterInfo &MRI = MF.getRegInfo();
7897 bool IsSubWord = (BitSize < 32);
7898
7899 // Extract the operands. Base can be a register or a frame index.
7900 Register Dest = MI.getOperand(0).getReg();
7901 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
7902 int64_t Disp = MI.getOperand(2).getImm();
7903 Register Src2 = MI.getOperand(3).getReg();
7904 Register BitShift = (IsSubWord ? MI.getOperand(4).getReg() : Register());
7905 Register NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : Register());
7906 DebugLoc DL = MI.getDebugLoc();
7907 if (IsSubWord)
7908 BitSize = MI.getOperand(6).getImm();
7909
7910 // Subword operations use 32-bit registers.
7911 const TargetRegisterClass *RC = (BitSize <= 32 ?
7912 &SystemZ::GR32BitRegClass :
7913 &SystemZ::GR64BitRegClass);
7914 unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
7915 unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
7916
7917 // Get the right opcodes for the displacement.
7918 LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
7919 CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
7920 assert(LOpcode && CSOpcode && "Displacement out of range")(static_cast <bool> (LOpcode && CSOpcode &&
"Displacement out of range") ? void (0) : __assert_fail ("LOpcode && CSOpcode && \"Displacement out of range\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7920, __extension__
__PRETTY_FUNCTION__))
;
7921
7922 // Create virtual registers for temporary results.
7923 Register OrigVal = MRI.createVirtualRegister(RC);
7924 Register OldVal = MRI.createVirtualRegister(RC);
7925 Register NewVal = MRI.createVirtualRegister(RC);
7926 Register RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
7927 Register RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2);
7928 Register RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
7929
7930 // Insert 3 basic blocks for the loop.
7931 MachineBasicBlock *StartMBB = MBB;
7932 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
7933 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
7934 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
7935 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
7936
7937 // StartMBB:
7938 // ...
7939 // %OrigVal = L Disp(%Base)
7940 // # fall through to LoopMBB
7941 MBB = StartMBB;
7942 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
7943 MBB->addSuccessor(LoopMBB);
7944
7945 // LoopMBB:
7946 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
7947 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
7948 // CompareOpcode %RotatedOldVal, %Src2
7949 // BRC KeepOldMask, UpdateMBB
7950 MBB = LoopMBB;
7951 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
7952 .addReg(OrigVal).addMBB(StartMBB)
7953 .addReg(Dest).addMBB(UpdateMBB);
7954 if (IsSubWord)
7955 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
7956 .addReg(OldVal).addReg(BitShift).addImm(0);
7957 BuildMI(MBB, DL, TII->get(CompareOpcode))
7958 .addReg(RotatedOldVal).addReg(Src2);
7959 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7960 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
7961 MBB->addSuccessor(UpdateMBB);
7962 MBB->addSuccessor(UseAltMBB);
7963
7964 // UseAltMBB:
7965 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
7966 // # fall through to UpdateMBB
7967 MBB = UseAltMBB;
7968 if (IsSubWord)
7969 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
7970 .addReg(RotatedOldVal).addReg(Src2)
7971 .addImm(32).addImm(31 + BitSize).addImm(0);
7972 MBB->addSuccessor(UpdateMBB);
7973
7974 // UpdateMBB:
7975 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
7976 // [ %RotatedAltVal, UseAltMBB ]
7977 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
7978 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
7979 // JNE LoopMBB
7980 // # fall through to DoneMBB
7981 MBB = UpdateMBB;
7982 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
7983 .addReg(RotatedOldVal).addMBB(LoopMBB)
7984 .addReg(RotatedAltVal).addMBB(UseAltMBB);
7985 if (IsSubWord)
7986 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
7987 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
7988 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
7989 .addReg(OldVal)
7990 .addReg(NewVal)
7991 .add(Base)
7992 .addImm(Disp);
7993 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7994 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
7995 MBB->addSuccessor(LoopMBB);
7996 MBB->addSuccessor(DoneMBB);
7997
7998 MI.eraseFromParent();
7999 return DoneMBB;
8000}
8001
8002// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW
8003// instruction MI.
8004MachineBasicBlock *
8005SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
8006 MachineBasicBlock *MBB) const {
8007 MachineFunction &MF = *MBB->getParent();
8008 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8009 MachineRegisterInfo &MRI = MF.getRegInfo();
8010
8011 // Extract the operands. Base can be a register or a frame index.
8012 Register Dest = MI.getOperand(0).getReg();
8013 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8014 int64_t Disp = MI.getOperand(2).getImm();
8015 Register CmpVal = MI.getOperand(3).getReg();
8016 Register OrigSwapVal = MI.getOperand(4).getReg();
8017 Register BitShift = MI.getOperand(5).getReg();
8018 Register NegBitShift = MI.getOperand(6).getReg();
8019 int64_t BitSize = MI.getOperand(7).getImm();
8020 DebugLoc DL = MI.getDebugLoc();
8021
8022 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
8023
8024 // Get the right opcodes for the displacement and zero-extension.
8025 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8026 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8027 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
8028 assert(LOpcode && CSOpcode && "Displacement out of range")(static_cast <bool> (LOpcode && CSOpcode &&
"Displacement out of range") ? void (0) : __assert_fail ("LOpcode && CSOpcode && \"Displacement out of range\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 8028, __extension__
__PRETTY_FUNCTION__))
;
8029
8030 // Create virtual registers for temporary results.
8031 Register OrigOldVal = MRI.createVirtualRegister(RC);
8032 Register OldVal = MRI.createVirtualRegister(RC);
8033 Register SwapVal = MRI.createVirtualRegister(RC);
8034 Register StoreVal = MRI.createVirtualRegister(RC);
8035 Register OldValRot = MRI.createVirtualRegister(RC);
8036 Register RetryOldVal = MRI.createVirtualRegister(RC);
8037 Register RetrySwapVal = MRI.createVirtualRegister(RC);
8038
8039 // Insert 2 basic blocks for the loop.
8040 MachineBasicBlock *StartMBB = MBB;
8041 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
8042 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8043 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
8044
8045 // StartMBB:
8046 // ...
8047 // %OrigOldVal = L Disp(%Base)
8048 // # fall through to LoopMBB
8049 MBB = StartMBB;
8050 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
8051 .add(Base)
8052 .addImm(Disp)
8053 .addReg(0);
8054 MBB->addSuccessor(LoopMBB);
8055
8056 // LoopMBB:
8057 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
8058 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
8059 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
8060 // ^^ The low BitSize bits contain the field
8061 // of interest.
8062 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
8063 // ^^ Replace the upper 32-BitSize bits of the
8064 // swap value with those that we loaded and rotated.
8065 // %Dest = LL[CH] %OldValRot
8066 // CR %Dest, %CmpVal
8067 // JNE DoneMBB
8068 // # Fall through to SetMBB
8069 MBB = LoopMBB;
8070 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8071 .addReg(OrigOldVal).addMBB(StartMBB)
8072 .addReg(RetryOldVal).addMBB(SetMBB);
8073 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
8074 .addReg(OrigSwapVal).addMBB(StartMBB)
8075 .addReg(RetrySwapVal).addMBB(SetMBB);
8076 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
8077 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
8078 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
8079 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
8080 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
8081 .addReg(OldValRot);
8082 BuildMI(MBB, DL, TII->get(SystemZ::CR))
8083 .addReg(Dest).addReg(CmpVal);
8084 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8085 .addImm(SystemZ::CCMASK_ICMP)
8086 .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB);
8087 MBB->addSuccessor(DoneMBB);
8088 MBB->addSuccessor(SetMBB);
8089
8090 // SetMBB:
8091 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
8092 // ^^ Rotate the new field to its proper position.
8093 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
8094 // JNE LoopMBB
8095 // # fall through to ExitMBB
8096 MBB = SetMBB;
8097 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
8098 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
8099 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
8100 .addReg(OldVal)
8101 .addReg(StoreVal)
8102 .add(Base)
8103 .addImm(Disp);
8104 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8105 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
8106 MBB->addSuccessor(LoopMBB);
8107 MBB->addSuccessor(DoneMBB);
8108
8109 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
8110 // to the block after the loop. At this point, CC may have been defined
8111 // either by the CR in LoopMBB or by the CS in SetMBB.
8112 if (!MI.registerDefIsDead(SystemZ::CC))
8113 DoneMBB->addLiveIn(SystemZ::CC);
8114
8115 MI.eraseFromParent();
8116 return DoneMBB;
8117}
8118
8119// Emit a move from two GR64s to a GR128.
8120MachineBasicBlock *
8121SystemZTargetLowering::emitPair128(MachineInstr &MI,
8122 MachineBasicBlock *MBB) const {
8123 MachineFunction &MF = *MBB->getParent();
8124 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8125 MachineRegisterInfo &MRI = MF.getRegInfo();
8126 DebugLoc DL = MI.getDebugLoc();
8127
8128 Register Dest = MI.getOperand(0).getReg();
8129 Register Hi = MI.getOperand(1).getReg();
8130 Register Lo = MI.getOperand(2).getReg();
8131 Register Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8132 Register Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8133
8134 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Tmp1);
8135 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Tmp2)
8136 .addReg(Tmp1).addReg(Hi).addImm(SystemZ::subreg_h64);
8137 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
8138 .addReg(Tmp2).addReg(Lo).addImm(SystemZ::subreg_l64);
8139
8140 MI.eraseFromParent();
8141 return MBB;
8142}
8143
8144// Emit an extension from a GR64 to a GR128. ClearEven is true
8145// if the high register of the GR128 value must be cleared or false if
8146// it's "don't care".
8147MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
8148 MachineBasicBlock *MBB,
8149 bool ClearEven) const {
8150 MachineFunction &MF = *MBB->getParent();
8151 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8152 MachineRegisterInfo &MRI = MF.getRegInfo();
8153 DebugLoc DL = MI.getDebugLoc();
8154
8155 Register Dest = MI.getOperand(0).getReg();
8156 Register Src = MI.getOperand(1).getReg();
8157 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8158
8159 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
8160 if (ClearEven) {
8161 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8162 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
8163
8164 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
8165 .addImm(0);
8166 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
8167 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
8168 In128 = NewIn128;
8169 }
8170 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
8171 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
8172
8173 MI.eraseFromParent();
8174 return MBB;
8175}
8176
8177MachineBasicBlock *
8178SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
8179 MachineBasicBlock *MBB,
8180 unsigned Opcode, bool IsMemset) const {
8181 MachineFunction &MF = *MBB->getParent();
8182 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8183 MachineRegisterInfo &MRI = MF.getRegInfo();
8184 DebugLoc DL = MI.getDebugLoc();
8185
8186 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
8187 uint64_t DestDisp = MI.getOperand(1).getImm();
8188 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
8189 uint64_t SrcDisp;
8190
8191 // Fold the displacement Disp if it is out of range.
8192 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
8193 if (!isUInt<12>(Disp)) {
8194 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8195 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
8196 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
8197 .add(Base).addImm(Disp).addReg(0);
8198 Base = MachineOperand::CreateReg(Reg, false);
8199 Disp = 0;
8200 }
8201 };
8202
8203 if (!IsMemset) {
8204 SrcBase = earlyUseOperand(MI.getOperand(2));
8205 SrcDisp = MI.getOperand(3).getImm();
8206 } else {
8207 SrcBase = DestBase;
8208 SrcDisp = DestDisp++;
8209 foldDisplIfNeeded(DestBase, DestDisp);
8210 }
8211
8212 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
8213 bool IsImmForm = LengthMO.isImm();
8214 bool IsRegForm = !IsImmForm;
8215
8216 // Build and insert one Opcode of Length, with special treatment for memset.
8217 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
8218 MachineBasicBlock::iterator InsPos,
8219 MachineOperand DBase, uint64_t DDisp,
8220 MachineOperand SBase, uint64_t SDisp,
8221 unsigned Length) -> void {
8222 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.")(static_cast <bool> (Length > 0 && Length <=
256 && "Building memory op with bad length.") ? void
(0) : __assert_fail ("Length > 0 && Length <= 256 && \"Building memory op with bad length.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 8222, __extension__
__PRETTY_FUNCTION__))
;
8223 if (IsMemset) {
8224 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
8225 if (ByteMO.isImm())
8226 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
8227 .add(SBase).addImm(SDisp).add(ByteMO);
8228 else
8229 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
8230 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
8231 if (--Length == 0)
8232 return;
8233 }
8234 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
8235 .add(DBase).addImm(DDisp).addImm(Length)
8236 .add(SBase).addImm(SDisp)
8237 .setMemRefs(MI.memoperands());
8238 };
8239
8240 bool NeedsLoop = false;
8241 uint64_t ImmLength = 0;
8242 Register LenAdjReg = SystemZ::NoRegister;
8243 if (IsImmForm) {
8244 ImmLength = LengthMO.getImm();
8245 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
8246 if (ImmLength == 0) {
8247 MI.eraseFromParent();
8248 return MBB;
8249 }
8250 if (Opcode == SystemZ::CLC) {
8251 if (ImmLength > 3 * 256)
8252 // A two-CLC sequence is a clear win over a loop, not least because
8253 // it needs only one branch. A three-CLC sequence needs the same
8254 // number of branches as a loop (i.e. 2), but is shorter. That
8255 // brings us to lengths greater than 768 bytes. It seems relatively
8256 // likely that a difference will be found within the first 768 bytes,
8257 // so we just optimize for the smallest number of branch
8258 // instructions, in order to avoid polluting the prediction buffer
8259 // too much.
8260 NeedsLoop = true;
8261 } else if (ImmLength > 6 * 256)
8262 // The heuristic we use is to prefer loops for anything that would
8263 // require 7 or more MVCs. With these kinds of sizes there isn't much
8264 // to choose between straight-line code and looping code, since the
8265 // time will be dominated by the MVCs themselves.
8266 NeedsLoop = true;
8267 } else {
8268 NeedsLoop = true;
8269 LenAdjReg = LengthMO.getReg();
8270 }
8271
8272 // When generating more than one CLC, all but the last will need to
8273 // branch to the end when a difference is found.
8274 MachineBasicBlock *EndMBB =
8275 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
8276 ? SystemZ::splitBlockAfter(MI, MBB)
8277 : nullptr);
8278
8279 if (NeedsLoop) {
8280 Register StartCountReg =
8281 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
8282 if (IsImmForm) {
8283 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
8284 ImmLength &= 255;
8285 } else {
8286 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
8287 .addReg(LenAdjReg)
8288 .addReg(0)
8289 .addImm(8);
8290 }
8291
8292 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
8293 auto loadZeroAddress = [&]() -> MachineOperand {
8294 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8295 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
8296 return MachineOperand::CreateReg(Reg, false);
8297 };
8298 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
8299 DestBase = loadZeroAddress();
8300 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
8301 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
8302
8303 MachineBasicBlock *StartMBB = nullptr;
8304 MachineBasicBlock *LoopMBB = nullptr;
8305 MachineBasicBlock *NextMBB = nullptr;
8306 MachineBasicBlock *DoneMBB = nullptr;
8307 MachineBasicBlock *AllDoneMBB = nullptr;
8308
8309 Register StartSrcReg = forceReg(MI, SrcBase, TII);
8310 Register StartDestReg =
8311 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
8312
8313 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
8314 Register ThisSrcReg = MRI.createVirtualRegister(RC);
8315 Register ThisDestReg =
8316 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
8317 Register NextSrcReg = MRI.createVirtualRegister(RC);
8318 Register NextDestReg =
8319 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
8320 RC = &SystemZ::GR64BitRegClass;
8321 Register ThisCountReg = MRI.createVirtualRegister(RC);
8322 Register NextCountReg = MRI.createVirtualRegister(RC);
8323
8324 if (IsRegForm) {
8325 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
8326 StartMBB = SystemZ::emitBlockAfter(MBB);
8327 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8328 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
8329 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
8330
8331 // MBB:
8332 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
8333 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8334 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
8335 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8336 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
8337 .addMBB(AllDoneMBB);
8338 MBB->addSuccessor(AllDoneMBB);
8339 if (!IsMemset)
8340 MBB->addSuccessor(StartMBB);
8341 else {
8342 // MemsetOneCheckMBB:
8343 // # Jump to MemsetOneMBB for a memset of length 1, or
8344 // # fall thru to StartMBB.
8345 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
8346 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
8347 MBB->addSuccessor(MemsetOneCheckMBB);
8348 MBB = MemsetOneCheckMBB;
8349 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8350 .addReg(LenAdjReg).addImm(-1);
8351 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8352 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
8353 .addMBB(MemsetOneMBB);
8354 MBB->addSuccessor(MemsetOneMBB, {10, 100});
8355 MBB->addSuccessor(StartMBB, {90, 100});
8356
8357 // MemsetOneMBB:
8358 // # Jump back to AllDoneMBB after a single MVI or STC.
8359 MBB = MemsetOneMBB;
8360 insertMemMemOp(MBB, MBB->end(),
8361 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
8362 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
8363 1);
8364 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
8365 MBB->addSuccessor(AllDoneMBB);
8366 }
8367
8368 // StartMBB:
8369 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
8370 MBB = StartMBB;
8371 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8372 .addReg(StartCountReg).addImm(0);
8373 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8374 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
8375 .addMBB(DoneMBB);
8376 MBB->addSuccessor(DoneMBB);
8377 MBB->addSuccessor(LoopMBB);
8378 }
8379 else {
8380 StartMBB = MBB;
8381 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
8382 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8383 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
8384
8385 // StartMBB:
8386 // # fall through to LoopMBB
8387 MBB->addSuccessor(LoopMBB);
8388
8389 DestBase = MachineOperand::CreateReg(NextDestReg, false);
8390 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
8391 if (EndMBB && !ImmLength)
8392 // If the loop handled the whole CLC range, DoneMBB will be empty with
8393 // CC live-through into EndMBB, so add it as live-in.
8394 DoneMBB->addLiveIn(SystemZ::CC);
8395 }
8396
8397 // LoopMBB:
8398 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
8399 // [ %NextDestReg, NextMBB ]
8400 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
8401 // [ %NextSrcReg, NextMBB ]
8402 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
8403 // [ %NextCountReg, NextMBB ]
8404 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
8405 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
8406 // ( JLH EndMBB )
8407 //
8408 // The prefetch is used only for MVC. The JLH is used only for CLC.
8409 MBB = LoopMBB;
8410 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
8411 .addReg(StartDestReg).addMBB(StartMBB)
8412 .addReg(NextDestReg).addMBB(NextMBB);
8413 if (!HaveSingleBase)
8414 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
8415 .addReg(StartSrcReg).addMBB(StartMBB)
8416 .addReg(NextSrcReg).addMBB(NextMBB);
8417 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
8418 .addReg(StartCountReg).addMBB(StartMBB)
8419 .addReg(NextCountReg).addMBB(NextMBB);
8420 if (Opcode == SystemZ::MVC)
8421 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
8422 .addImm(SystemZ::PFD_WRITE)
8423 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
8424 insertMemMemOp(MBB, MBB->end(),
8425 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
8426 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
8427 if (EndMBB) {
8428 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8429 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
8430 .addMBB(EndMBB);
8431 MBB->addSuccessor(EndMBB);
8432 MBB->addSuccessor(NextMBB);
8433 }
8434
8435 // NextMBB:
8436 // %NextDestReg = LA 256(%ThisDestReg)
8437 // %NextSrcReg = LA 256(%ThisSrcReg)
8438 // %NextCountReg = AGHI %ThisCountReg, -1
8439 // CGHI %NextCountReg, 0
8440 // JLH LoopMBB
8441 // # fall through to DoneMBB
8442 //
8443 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
8444 MBB = NextMBB;
8445 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
8446 .addReg(ThisDestReg).addImm(256).addReg(0);
8447 if (!HaveSingleBase)
8448 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
8449 .addReg(ThisSrcReg).addImm(256).addReg(0);
8450 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
8451 .addReg(ThisCountReg).addImm(-1);
8452 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8453 .addReg(NextCountReg).addImm(0);
8454 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8455 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
8456 .addMBB(LoopMBB);
8457 MBB->addSuccessor(LoopMBB);
8458 MBB->addSuccessor(DoneMBB);
8459
8460 MBB = DoneMBB;
8461 if (IsRegForm) {
8462 // DoneMBB:
8463 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
8464 // # Use EXecute Relative Long for the remainder of the bytes. The target
8465 // instruction of the EXRL will have a length field of 1 since 0 is an
8466 // illegal value. The number of bytes processed becomes (%LenAdjReg &
8467 // 0xff) + 1.
8468 // # Fall through to AllDoneMBB.
8469 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8470 Register RemDestReg = HaveSingleBase ? RemSrcReg
8471 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8472 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
8473 .addReg(StartDestReg).addMBB(StartMBB)
8474 .addReg(NextDestReg).addMBB(NextMBB);
8475 if (!HaveSingleBase)
8476 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
8477 .addReg(StartSrcReg).addMBB(StartMBB)
8478 .addReg(NextSrcReg).addMBB(NextMBB);
8479 if (IsMemset)
8480 insertMemMemOp(MBB, MBB->end(),
8481 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
8482 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
8483 MachineInstrBuilder EXRL_MIB =
8484 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
8485 .addImm(Opcode)
8486 .addReg(LenAdjReg)
8487 .addReg(RemDestReg).addImm(DestDisp)
8488 .addReg(RemSrcReg).addImm(SrcDisp);
8489 MBB->addSuccessor(AllDoneMBB);
8490 MBB = AllDoneMBB;
8491 if (EndMBB) {
8492 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
8493 MBB->addLiveIn(SystemZ::CC);
8494 }
8495 }
8496 }
8497
8498 // Handle any remaining bytes with straight-line code.
8499 while (ImmLength > 0) {
8500 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
8501 // The previous iteration might have created out-of-range displacements.
8502 // Apply them using LA/LAY if so.
8503 foldDisplIfNeeded(DestBase, DestDisp);
8504 foldDisplIfNeeded(SrcBase, SrcDisp);
8505 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
8506 DestDisp += ThisLength;
8507 SrcDisp += ThisLength;
8508 ImmLength -= ThisLength;
8509 // If there's another CLC to go, branch to the end if a difference
8510 // was found.
8511 if (EndMBB && ImmLength > 0) {
8512 MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
8513 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8514 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
8515 .addMBB(EndMBB);
8516 MBB->addSuccessor(EndMBB);
8517 MBB->addSuccessor(NextMBB);
8518 MBB = NextMBB;
8519 }
8520 }
8521 if (EndMBB) {
8522 MBB->addSuccessor(EndMBB);
8523 MBB = EndMBB;
8524 MBB->addLiveIn(SystemZ::CC);
8525 }
8526
8527 MI.eraseFromParent();
8528 return MBB;
8529}
8530
8531// Decompose string pseudo-instruction MI into a loop that continually performs
8532// Opcode until CC != 3.
8533MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
8534 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
8535 MachineFunction &MF = *MBB->getParent();
8536 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8537 MachineRegisterInfo &MRI = MF.getRegInfo();
8538 DebugLoc DL = MI.getDebugLoc();
8539
8540 uint64_t End1Reg = MI.getOperand(0).getReg();
8541 uint64_t Start1Reg = MI.getOperand(1).getReg();
8542 uint64_t Start2Reg = MI.getOperand(2).getReg();
8543 uint64_t CharReg = MI.getOperand(3).getReg();
8544
8545 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
8546 uint64_t This1Reg = MRI.createVirtualRegister(RC);
8547 uint64_t This2Reg = MRI.createVirtualRegister(RC);
8548 uint64_t End2Reg = MRI.createVirtualRegister(RC);
8549
8550 MachineBasicBlock *StartMBB = MBB;
8551 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
8552 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8553
8554 // StartMBB:
8555 // # fall through to LoopMBB
8556 MBB->addSuccessor(LoopMBB);
8557
8558 // LoopMBB:
8559 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
8560 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
8561 // R0L = %CharReg
8562 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
8563 // JO LoopMBB
8564 // # fall through to DoneMBB
8565 //
8566 // The load of R0L can be hoisted by post-RA LICM.
8567 MBB = LoopMBB;
8568
8569 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
8570 .addReg(Start1Reg).addMBB(StartMBB)
8571 .addReg(End1Reg).addMBB(LoopMBB);
8572 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
8573 .addReg(Start2Reg).addMBB(StartMBB)
8574 .addReg(End2Reg).addMBB(LoopMBB);
8575 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
8576 BuildMI(MBB, DL, TII->get(Opcode))
8577 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
8578 .addReg(This1Reg).addReg(This2Reg);
8579 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8580 .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB);
8581 MBB->addSuccessor(LoopMBB);
8582 MBB->addSuccessor(DoneMBB);
8583
8584 DoneMBB->addLiveIn(SystemZ::CC);
8585
8586 MI.eraseFromParent();
8587 return DoneMBB;
8588}
8589
8590// Update TBEGIN instruction with final opcode and register clobbers.
8591MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
8592 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
8593 bool NoFloat) const {
8594 MachineFunction &MF = *MBB->getParent();
8595 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
8596 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8597
8598 // Update opcode.
8599 MI.setDesc(TII->get(Opcode));
8600
8601 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
8602 // Make sure to add the corresponding GRSM bits if they are missing.
8603 uint64_t Control = MI.getOperand(2).getImm();
8604 static const unsigned GPRControlBit[16] = {
8605 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
8606 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
8607 };
8608 Control |= GPRControlBit[15];
8609 if (TFI->hasFP(MF))
8610 Control |= GPRControlBit[11];
8611 MI.getOperand(2).setImm(Control);
8612
8613 // Add GPR clobbers.
8614 for (int I = 0; I < 16; I++) {
8615 if ((Control & GPRControlBit[I]) == 0) {
8616 unsigned Reg = SystemZMC::GR64Regs[I];
8617 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
8618 }
8619 }
8620
8621 // Add FPR/VR clobbers.
8622 if (!NoFloat && (Control & 4) != 0) {
8623 if (Subtarget.hasVector()) {
8624 for (unsigned Reg : SystemZMC::VR128Regs) {
8625 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
8626 }
8627 } else {
8628 for (unsigned Reg : SystemZMC::FP64Regs) {
8629 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
8630 }
8631 }
8632 }
8633
8634 return MBB;
8635}
8636
8637MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
8638 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
8639 MachineFunction &MF = *MBB->getParent();
8640 MachineRegisterInfo *MRI = &MF.getRegInfo();
8641 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8642 DebugLoc DL = MI.getDebugLoc();
8643
8644 Register SrcReg = MI.getOperand(0).getReg();
8645
8646 // Create new virtual register of the same class as source.
8647 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
8648 Register DstReg = MRI->createVirtualRegister(RC);
8649
8650 // Replace pseudo with a normal load-and-test that models the def as
8651 // well.
8652 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
8653 .addReg(SrcReg)
8654 .setMIFlags(MI.getFlags());
8655 MI.eraseFromParent();
8656
8657 return MBB;
8658}
8659
8660MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
8661 MachineInstr &MI, MachineBasicBlock *MBB) const {
8662 MachineFunction &MF = *MBB->getParent();
8663 MachineRegisterInfo *MRI = &MF.getRegInfo();
8664 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8665 DebugLoc DL = MI.getDebugLoc();
8666 const unsigned ProbeSize = getStackProbeSize(MF);
8667 Register DstReg = MI.getOperand(0).getReg();
8668 Register SizeReg = MI.getOperand(2).getReg();
8669
8670 MachineBasicBlock *StartMBB = MBB;
8671 MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB);
8672 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
8673 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
8674 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
8675 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
8676
8677 MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(),
8678 MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1));
8679
8680 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8681 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8682
8683 // LoopTestMBB
8684 // BRC TailTestMBB
8685 // # fallthrough to LoopBodyMBB
8686 StartMBB->addSuccessor(LoopTestMBB);
8687 MBB = LoopTestMBB;
8688 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
8689 .addReg(SizeReg)
8690 .addMBB(StartMBB)
8691 .addReg(IncReg)
8692 .addMBB(LoopBodyMBB);
8693 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
8694 .addReg(PHIReg)
8695 .addImm(ProbeSize);
8696 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8697 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT)
8698 .addMBB(TailTestMBB);
8699 MBB->addSuccessor(LoopBodyMBB);
8700 MBB->addSuccessor(TailTestMBB);
8701
8702 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
8703 // J LoopTestMBB
8704 MBB = LoopBodyMBB;
8705 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
8706 .addReg(PHIReg)
8707 .addImm(ProbeSize);
8708 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
8709 .addReg(SystemZ::R15D)
8710 .addImm(ProbeSize);
8711 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
8712 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
8713 .setMemRefs(VolLdMMO);
8714 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
8715 MBB->addSuccessor(LoopTestMBB);
8716
8717 // TailTestMBB
8718 // BRC DoneMBB
8719 // # fallthrough to TailMBB
8720 MBB = TailTestMBB;
8721 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8722 .addReg(PHIReg)
8723 .addImm(0);
8724 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8725 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
8726 .addMBB(DoneMBB);
8727 MBB->addSuccessor(TailMBB);
8728 MBB->addSuccessor(DoneMBB);
8729
8730 // TailMBB
8731 // # fallthrough to DoneMBB
8732 MBB = TailMBB;
8733 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
8734 .addReg(SystemZ::R15D)
8735 .addReg(PHIReg);
8736 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
8737 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
8738 .setMemRefs(VolLdMMO);
8739 MBB->addSuccessor(DoneMBB);
8740
8741 // DoneMBB
8742 MBB = DoneMBB;
8743 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
8744 .addReg(SystemZ::R15D);
8745
8746 MI.eraseFromParent();
8747 return DoneMBB;
8748}
8749
8750SDValue SystemZTargetLowering::
8751getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
8752 MachineFunction &MF = DAG.getMachineFunction();
8753 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
8754 SDLoc DL(SP);
8755 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
8756 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
8757}
8758
8759MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
8760 MachineInstr &MI, MachineBasicBlock *MBB) const {
8761 switch (MI.getOpcode()) {
8762 case SystemZ::Select32:
8763 case SystemZ::Select64:
8764 case SystemZ::SelectF32:
8765 case SystemZ::SelectF64:
8766 case SystemZ::SelectF128:
8767 case SystemZ::SelectVR32:
8768 case SystemZ::SelectVR64:
8769 case SystemZ::SelectVR128:
8770 return emitSelect(MI, MBB);
8771
8772 case SystemZ::CondStore8Mux:
8773 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
8774 case SystemZ::CondStore8MuxInv:
8775 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
8776 case SystemZ::CondStore16Mux:
8777 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
8778 case SystemZ::CondStore16MuxInv:
8779 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
8780 case SystemZ::CondStore32Mux:
8781 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
8782 case SystemZ::CondStore32MuxInv:
8783 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
8784 case SystemZ::CondStore8:
8785 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
8786 case SystemZ::CondStore8Inv:
8787 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
8788 case SystemZ::CondStore16:
8789 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
8790 case SystemZ::CondStore16Inv:
8791 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
8792 case SystemZ::CondStore32:
8793 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
8794 case SystemZ::CondStore32Inv:
8795 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
8796 case SystemZ::CondStore64:
8797 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
8798 case SystemZ::CondStore64Inv:
8799 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
8800 case SystemZ::CondStoreF32:
8801 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
8802 case SystemZ::CondStoreF32Inv:
8803 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
8804 case SystemZ::CondStoreF64:
8805 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
8806 case SystemZ::CondStoreF64Inv:
8807 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
8808
8809 case SystemZ::PAIR128:
8810 return emitPair128(MI, MBB);
8811 case SystemZ::AEXT128:
8812 return emitExt128(MI, MBB, false);
8813 case SystemZ::ZEXT128:
8814 return emitExt128(MI, MBB, true);
8815
8816 case SystemZ::ATOMIC_SWAPW:
8817 return emitAtomicLoadBinary(MI, MBB, 0, 0);
8818 case SystemZ::ATOMIC_SWAP_32:
8819 return emitAtomicLoadBinary(MI, MBB, 0, 32);
8820 case SystemZ::ATOMIC_SWAP_64:
8821 return emitAtomicLoadBinary(MI, MBB, 0, 64);
8822
8823 case SystemZ::ATOMIC_LOADW_AR:
8824 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0);
8825 case SystemZ::ATOMIC_LOADW_AFI:
8826 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0);
8827 case SystemZ::ATOMIC_LOAD_AR:
8828 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32);
8829 case SystemZ::ATOMIC_LOAD_AHI:
8830 return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32);
8831 case SystemZ::ATOMIC_LOAD_AFI:
8832 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32);
8833 case SystemZ::ATOMIC_LOAD_AGR:
8834 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64);
8835 case SystemZ::ATOMIC_LOAD_AGHI:
8836 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64);
8837 case SystemZ::ATOMIC_LOAD_AGFI:
8838 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64);
8839
8840 case SystemZ::ATOMIC_LOADW_SR:
8841 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0);
8842 case SystemZ::ATOMIC_LOAD_SR:
8843 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32);
8844 case SystemZ::ATOMIC_LOAD_SGR:
8845 return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64);
8846
8847 case SystemZ::ATOMIC_LOADW_NR:
8848 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0);
8849 case SystemZ::ATOMIC_LOADW_NILH:
8850 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0);
8851 case SystemZ::ATOMIC_LOAD_NR:
8852 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32);
8853 case SystemZ::ATOMIC_LOAD_NILL:
8854 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32);
8855 case SystemZ::ATOMIC_LOAD_NILH:
8856 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32);
8857 case SystemZ::ATOMIC_LOAD_NILF:
8858 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32);
8859 case SystemZ::ATOMIC_LOAD_NGR:
8860 return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64);
8861 case SystemZ::ATOMIC_LOAD_NILL64:
8862 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64);
8863 case SystemZ::ATOMIC_LOAD_NILH64:
8864 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64);
8865 case SystemZ::ATOMIC_LOAD_NIHL64:
8866 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64);
8867 case SystemZ::ATOMIC_LOAD_NIHH64:
8868 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64);
8869 case SystemZ::ATOMIC_LOAD_NILF64:
8870 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64);
8871 case SystemZ::ATOMIC_LOAD_NIHF64:
8872 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64);
8873
8874 case SystemZ::ATOMIC_LOADW_OR:
8875 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0);
8876 case SystemZ::ATOMIC_LOADW_OILH:
8877 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0);
8878 case SystemZ::ATOMIC_LOAD_OR:
8879 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32);
8880 case SystemZ::ATOMIC_LOAD_OILL:
8881 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32);
8882 case SystemZ::ATOMIC_LOAD_OILH:
8883 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32);
8884 case SystemZ::ATOMIC_LOAD_OILF:
8885 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32);
8886 case SystemZ::ATOMIC_LOAD_OGR:
8887 return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64);
8888 case SystemZ::ATOMIC_LOAD_OILL64:
8889 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64);
8890 case SystemZ::ATOMIC_LOAD_OILH64:
8891 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64);
8892 case SystemZ::ATOMIC_LOAD_OIHL64:
8893 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL64, 64);
8894 case SystemZ::ATOMIC_LOAD_OIHH64:
8895 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH64, 64);
8896 case SystemZ::ATOMIC_LOAD_OILF64:
8897 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64);
8898 case SystemZ::ATOMIC_LOAD_OIHF64:
8899 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF64, 64);
8900
8901 case SystemZ::ATOMIC_LOADW_XR:
8902 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0);
8903 case SystemZ::ATOMIC_LOADW_XILF:
8904 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0);
8905 case SystemZ::ATOMIC_LOAD_XR:
8906 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32);
8907 case SystemZ::ATOMIC_LOAD_XILF:
8908 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32);
8909 case SystemZ::ATOMIC_LOAD_XGR:
8910 return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64);
8911 case SystemZ::ATOMIC_LOAD_XILF64:
8912 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64);
8913 case SystemZ::ATOMIC_LOAD_XIHF64:
8914 return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF64, 64);
8915
8916 case SystemZ::ATOMIC_LOADW_NRi:
8917 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true);
8918 case SystemZ::ATOMIC_LOADW_NILHi:
8919 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true);
8920 case SystemZ::ATOMIC_LOAD_NRi:
8921 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true);
8922 case SystemZ::ATOMIC_LOAD_NILLi:
8923 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true);
8924 case SystemZ::ATOMIC_LOAD_NILHi:
8925 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true);
8926 case SystemZ::ATOMIC_LOAD_NILFi:
8927 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true);
8928 case SystemZ::ATOMIC_LOAD_NGRi:
8929 return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true);
8930 case SystemZ::ATOMIC_LOAD_NILL64i:
8931 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true);
8932 case SystemZ::ATOMIC_LOAD_NILH64i:
8933 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true);
8934 case SystemZ::ATOMIC_LOAD_NIHL64i:
8935 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64, true);
8936 case SystemZ::ATOMIC_LOAD_NIHH64i:
8937 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64, true);
8938 case SystemZ::ATOMIC_LOAD_NILF64i:
8939 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true);
8940 case SystemZ::ATOMIC_LOAD_NIHF64i:
8941 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64, true);
8942
8943 case SystemZ::ATOMIC_LOADW_MIN:
8944 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
8945 SystemZ::CCMASK_CMP_LE, 0);
8946 case SystemZ::ATOMIC_LOAD_MIN_32:
8947 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
8948 SystemZ::CCMASK_CMP_LE, 32);
8949 case SystemZ::ATOMIC_LOAD_MIN_64:
8950 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
8951 SystemZ::CCMASK_CMP_LE, 64);
8952
8953 case SystemZ::ATOMIC_LOADW_MAX:
8954 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
8955 SystemZ::CCMASK_CMP_GE, 0);
8956 case SystemZ::ATOMIC_LOAD_MAX_32:
8957 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
8958 SystemZ::CCMASK_CMP_GE, 32);
8959 case SystemZ::ATOMIC_LOAD_MAX_64:
8960 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
8961 SystemZ::CCMASK_CMP_GE, 64);
8962
8963 case SystemZ::ATOMIC_LOADW_UMIN:
8964 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
8965 SystemZ::CCMASK_CMP_LE, 0);
8966 case SystemZ::ATOMIC_LOAD_UMIN_32:
8967 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
8968 SystemZ::CCMASK_CMP_LE, 32);
8969 case SystemZ::ATOMIC_LOAD_UMIN_64:
8970 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
8971 SystemZ::CCMASK_CMP_LE, 64);
8972
8973 case SystemZ::ATOMIC_LOADW_UMAX:
8974 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
8975 SystemZ::CCMASK_CMP_GE, 0);
8976 case SystemZ::ATOMIC_LOAD_UMAX_32:
8977 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
8978 SystemZ::CCMASK_CMP_GE, 32);
8979 case SystemZ::ATOMIC_LOAD_UMAX_64:
8980 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
8981 SystemZ::CCMASK_CMP_GE, 64);
8982
8983 case SystemZ::ATOMIC_CMP_SWAPW:
8984 return emitAtomicCmpSwapW(MI, MBB);
8985 case SystemZ::MVCImm:
8986 case SystemZ::MVCReg:
8987 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
8988 case SystemZ::NCImm:
8989 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
8990 case SystemZ::OCImm:
8991 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
8992 case SystemZ::XCImm:
8993 case SystemZ::XCReg:
8994 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
8995 case SystemZ::CLCImm:
8996 case SystemZ::CLCReg:
8997 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
8998 case SystemZ::MemsetImmImm:
8999 case SystemZ::MemsetImmReg:
9000 case SystemZ::MemsetRegImm:
9001 case SystemZ::MemsetRegReg:
9002 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
9003 case SystemZ::CLSTLoop:
9004 return emitStringWrapper(MI, MBB, SystemZ::CLST);
9005 case SystemZ::MVSTLoop:
9006 return emitStringWrapper(MI, MBB, SystemZ::MVST);
9007 case SystemZ::SRSTLoop:
9008 return emitStringWrapper(MI, MBB, SystemZ::SRST);
9009 case SystemZ::TBEGIN:
9010 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
9011 case SystemZ::TBEGIN_nofloat:
9012 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
9013 case SystemZ::TBEGINC:
9014 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
9015 case SystemZ::LTEBRCompare_VecPseudo:
9016 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
9017 case SystemZ::LTDBRCompare_VecPseudo:
9018 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
9019 case SystemZ::LTXBRCompare_VecPseudo:
9020 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
9021
9022 case SystemZ::PROBED_ALLOCA:
9023 return emitProbedAlloca(MI, MBB);
9024
9025 case TargetOpcode::STACKMAP:
9026 case TargetOpcode::PATCHPOINT:
9027 return emitPatchPoint(MI, MBB);
9028
9029 default:
9030 llvm_unreachable("Unexpected instr type to insert")::llvm::llvm_unreachable_internal("Unexpected instr type to insert"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 9030)
;
9031 }
9032}
9033
9034// This is only used by the isel schedulers, and is needed only to prevent
9035// compiler from crashing when list-ilp is used.
9036const TargetRegisterClass *
9037SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
9038 if (VT == MVT::Untyped)
9039 return &SystemZ::ADDR128BitRegClass;
9040 return TargetLowering::getRepRegClassFor(VT);
9041}
9042
9043SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
9044 SelectionDAG &DAG) const {
9045 SDLoc dl(Op);
9046 /*
9047 The rounding method is in FPC Byte 3 bits 6-7, and has the following
9048 settings:
9049 00 Round to nearest
9050 01 Round to 0
9051 10 Round to +inf
9052 11 Round to -inf
9053
9054 FLT_ROUNDS, on the other hand, expects the following:
9055 -1 Undefined
9056 0 Round to 0
9057 1 Round to nearest
9058 2 Round to +inf
9059 3 Round to -inf
9060 */
9061
9062 // Save FPC to register.
9063 SDValue Chain = Op.getOperand(0);
9064 SDValue EFPC(
9065 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
9066 Chain = EFPC.getValue(1);
9067
9068 // Transform as necessary
9069 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
9070 DAG.getConstant(3, dl, MVT::i32));
9071 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
9072 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
9073 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
9074 DAG.getConstant(1, dl, MVT::i32)));
9075
9076 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
9077 DAG.getConstant(1, dl, MVT::i32));
9078 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
9079
9080 return DAG.getMergeValues({RetVal, Chain}, dl);
9081}

/build/source/llvm/include/llvm/Support/MathExtras.h

1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains some functions that are useful for math stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_SUPPORT_MATHEXTRAS_H
14#define LLVM_SUPPORT_MATHEXTRAS_H
15
16#include "llvm/ADT/bit.h"
17#include "llvm/Support/Compiler.h"
18#include <cassert>
19#include <climits>
20#include <cstdint>
21#include <cstring>
22#include <limits>
23#include <type_traits>
24
25namespace llvm {
26
27/// The behavior an operation has on an input of 0.
28enum ZeroBehavior {
29 /// The returned value is undefined.
30 ZB_Undefined,
31 /// The returned value is numeric_limits<T>::max()
32 ZB_Max
33};
34
35/// Mathematical constants.
36namespace numbers {
37// TODO: Track C++20 std::numbers.
38// TODO: Favor using the hexadecimal FP constants (requires C++17).
39constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113
40 egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620
41 ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162
42 ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392
43 log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0)
44 log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2)
45 pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796
46 inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541
47 sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161
48 inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197
49 sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219
50 inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1)
51 sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194
52 inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1)
53 phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622
54constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113
55 egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620
56 ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162
57 ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392
58 log2ef = 1.44269504F, // (0x1.715476P+0)
59 log10ef = .434294482F, // (0x1.bcb7b2P-2)
60 pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796
61 inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541
62 sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161
63 inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197
64 sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193
65 inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1)
66 sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194
67 inv_sqrt3f = .577350269F, // (0x1.279a74P-1)
68 phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622
69} // namespace numbers
70
71/// Count number of 0's from the least significant bit to the most
72/// stopping at the first 1.
73///
74/// Only unsigned integral types are allowed.
75///
76/// Returns std::numeric_limits<T>::digits on an input of 0.
77template <typename T> unsigned countTrailingZeros(T Val) {
78 static_assert(std::is_unsigned_v<T>,
79 "Only unsigned integral types are allowed.");
80 return llvm::countr_zero(Val);
81}
82
83/// Count number of 0's from the most significant bit to the least
84/// stopping at the first 1.
85///
86/// Only unsigned integral types are allowed.
87///
88/// Returns std::numeric_limits<T>::digits on an input of 0.
89template <typename T> unsigned countLeadingZeros(T Val) {
90 static_assert(std::is_unsigned_v<T>,
91 "Only unsigned integral types are allowed.");
92 return llvm::countl_zero(Val);
93}
94
95/// Get the index of the first set bit starting from the least
96/// significant bit.
97///
98/// Only unsigned integral types are allowed.
99///
100/// \param ZB the behavior on an input of 0.
101template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
102 if (ZB
12.1
'ZB' is equal to ZB_Max
12.1
'ZB' is equal to ZB_Max
12.1
'ZB' is equal to ZB_Max
== ZB_Max && Val == 0)
13
Assuming 'Val' is equal to 0
14
Taking true branch
103 return std::numeric_limits<T>::max();
15
Calling 'numeric_limits::max'
17
Returning from 'numeric_limits::max'
18
Returning the value 18446744073709551615
104
105 return llvm::countr_zero(Val);
106}
107
108/// Create a bitmask with the N right-most bits set to 1, and all other
109/// bits set to 0. Only unsigned types are allowed.
110template <typename T> T maskTrailingOnes(unsigned N) {
111 static_assert(std::is_unsigned<T>::value, "Invalid type!");
112 const unsigned Bits = CHAR_BIT8 * sizeof(T);
113 assert(N <= Bits && "Invalid bit index")(static_cast <bool> (N <= Bits && "Invalid bit index"
) ? void (0) : __assert_fail ("N <= Bits && \"Invalid bit index\""
, "llvm/include/llvm/Support/MathExtras.h", 113, __extension__
__PRETTY_FUNCTION__))
;
114 return N == 0 ? 0 : (T(-1) >> (Bits - N));
115}
116
117/// Create a bitmask with the N left-most bits set to 1, and all other
118/// bits set to 0. Only unsigned types are allowed.
119template <typename T> T maskLeadingOnes(unsigned N) {
120 return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
121}
122
123/// Create a bitmask with the N right-most bits set to 0, and all other
124/// bits set to 1. Only unsigned types are allowed.
125template <typename T> T maskTrailingZeros(unsigned N) {
126 return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
127}
128
129/// Create a bitmask with the N left-most bits set to 0, and all other
130/// bits set to 1. Only unsigned types are allowed.
131template <typename T> T maskLeadingZeros(unsigned N) {
132 return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
133}
134
135/// Get the index of the last set bit starting from the least
136/// significant bit.
137///
138/// Only unsigned integral types are allowed.
139///
140/// \param ZB the behavior on an input of 0.
141template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
142 if (ZB == ZB_Max && Val == 0)
143 return std::numeric_limits<T>::max();
144
145 // Use ^ instead of - because both gcc and llvm can remove the associated ^
146 // in the __builtin_clz intrinsic on x86.
147 return llvm::countl_zero(Val) ^ (std::numeric_limits<T>::digits - 1);
148}
149
150/// Macro compressed bit reversal table for 256 bits.
151///
152/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
153static const unsigned char BitReverseTable256[256] = {
154#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
155#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
156#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
157 R6(0), R6(2), R6(1), R6(3)
158#undef R2
159#undef R4
160#undef R6
161};
162
163/// Reverse the bits in \p Val.
164template <typename T> T reverseBits(T Val) {
165#if __has_builtin(__builtin_bitreverse8)1
166 if constexpr (std::is_same_v<T, uint8_t>)
167 return __builtin_bitreverse8(Val);
168#endif
169#if __has_builtin(__builtin_bitreverse16)1
170 if constexpr (std::is_same_v<T, uint16_t>)
171 return __builtin_bitreverse16(Val);
172#endif
173#if __has_builtin(__builtin_bitreverse32)1
174 if constexpr (std::is_same_v<T, uint32_t>)
175 return __builtin_bitreverse32(Val);
176#endif
177#if __has_builtin(__builtin_bitreverse64)1
178 if constexpr (std::is_same_v<T, uint64_t>)
179 return __builtin_bitreverse64(Val);
180#endif
181
182 unsigned char in[sizeof(Val)];
183 unsigned char out[sizeof(Val)];
184 std::memcpy(in, &Val, sizeof(Val));
185 for (unsigned i = 0; i < sizeof(Val); ++i)
186 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
187 std::memcpy(&Val, out, sizeof(Val));
188 return Val;
189}
190
191// NOTE: The following support functions use the _32/_64 extensions instead of
192// type overloading so that signed and unsigned integers can be used without
193// ambiguity.
194
195/// Return the high 32 bits of a 64 bit value.
196constexpr inline uint32_t Hi_32(uint64_t Value) {
197 return static_cast<uint32_t>(Value >> 32);
198}
199
200/// Return the low 32 bits of a 64 bit value.
201constexpr inline uint32_t Lo_32(uint64_t Value) {
202 return static_cast<uint32_t>(Value);
203}
204
205/// Make a 64-bit integer from a high / low pair of 32-bit integers.
206constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
207 return ((uint64_t)High << 32) | (uint64_t)Low;
208}
209
210/// Checks if an integer fits into the given bit width.
211template <unsigned N> constexpr inline bool isInt(int64_t x) {
212 if constexpr (N == 8)
213 return static_cast<int8_t>(x) == x;
214 if constexpr (N == 16)
215 return static_cast<int16_t>(x) == x;
216 if constexpr (N == 32)
217 return static_cast<int32_t>(x) == x;
218 if constexpr (N < 64)
219 return -(INT64_C(1)1L << (N - 1)) <= x && x < (INT64_C(1)1L << (N - 1));
220 (void)x; // MSVC v19.25 warns that x is unused.
221 return true;
222}
223
224/// Checks if a signed integer is an N bit number shifted left by S.
225template <unsigned N, unsigned S>
226constexpr inline bool isShiftedInt(int64_t x) {
227 static_assert(
228 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
229 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
230 return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
231}
232
233/// Checks if an unsigned integer fits into the given bit width.
234template <unsigned N> constexpr inline bool isUInt(uint64_t x) {
235 static_assert(N > 0, "isUInt<0> doesn't make sense");
236 if constexpr (N == 8)
237 return static_cast<uint8_t>(x) == x;
238 if constexpr (N == 16)
239 return static_cast<uint16_t>(x) == x;
240 if constexpr (N == 32)
241 return static_cast<uint32_t>(x) == x;
242 if constexpr (N < 64)
243 return x < (UINT64_C(1)1UL << (N));
244 (void)x; // MSVC v19.25 warns that x is unused.
245 return true;
246}
247
248/// Checks if a unsigned integer is an N bit number shifted left by S.
249template <unsigned N, unsigned S>
250constexpr inline bool isShiftedUInt(uint64_t x) {
251 static_assert(
252 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
253 static_assert(N + S <= 64,
254 "isShiftedUInt<N, S> with N + S > 64 is too wide.");
255 // Per the two static_asserts above, S must be strictly less than 64. So
256 // 1 << S is not undefined behavior.
257 return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
258}
259
260/// Gets the maximum value for a N-bit unsigned integer.
261inline uint64_t maxUIntN(uint64_t N) {
262 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "llvm/include/llvm/Support/MathExtras.h", 262, __extension__
__PRETTY_FUNCTION__))
;
263
264 // uint64_t(1) << 64 is undefined behavior, so we can't do
265 // (uint64_t(1) << N) - 1
266 // without checking first that N != 64. But this works and doesn't have a
267 // branch.
268 return UINT64_MAX(18446744073709551615UL) >> (64 - N);
269}
270
271/// Gets the minimum value for a N-bit signed integer.
272inline int64_t minIntN(int64_t N) {
273 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "llvm/include/llvm/Support/MathExtras.h", 273, __extension__
__PRETTY_FUNCTION__))
;
274
275 return UINT64_C(1)1UL + ~(UINT64_C(1)1UL << (N - 1));
276}
277
278/// Gets the maximum value for a N-bit signed integer.
279inline int64_t maxIntN(int64_t N) {
280 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "llvm/include/llvm/Support/MathExtras.h", 280, __extension__
__PRETTY_FUNCTION__))
;
281
282 // This relies on two's complement wraparound when N == 64, so we convert to
283 // int64_t only at the very end to avoid UB.
284 return (UINT64_C(1)1UL << (N - 1)) - 1;
285}
286
287/// Checks if an unsigned integer fits into the given (dynamic) bit width.
288inline bool isUIntN(unsigned N, uint64_t x) {
289 return N >= 64 || x <= maxUIntN(N);
290}
291
292/// Checks if an signed integer fits into the given (dynamic) bit width.
293inline bool isIntN(unsigned N, int64_t x) {
294 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
295}
296
297/// Return true if the argument is a non-empty sequence of ones starting at the
298/// least significant bit with the remainder zero (32 bit version).
299/// Ex. isMask_32(0x0000FFFFU) == true.
300constexpr inline bool isMask_32(uint32_t Value) {
301 return Value && ((Value + 1) & Value) == 0;
302}
303
304/// Return true if the argument is a non-empty sequence of ones starting at the
305/// least significant bit with the remainder zero (64 bit version).
306constexpr inline bool isMask_64(uint64_t Value) {
307 return Value && ((Value + 1) & Value) == 0;
308}
309
310/// Return true if the argument contains a non-empty sequence of ones with the
311/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
312constexpr inline bool isShiftedMask_32(uint32_t Value) {
313 return Value && isMask_32((Value - 1) | Value);
314}
315
316/// Return true if the argument contains a non-empty sequence of ones with the
317/// remainder zero (64 bit version.)
318constexpr inline bool isShiftedMask_64(uint64_t Value) {
319 return Value && isMask_64((Value - 1) | Value);
320}
321
322/// Return true if the argument is a power of two > 0.
323/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
324constexpr inline bool isPowerOf2_32(uint32_t Value) {
325 return llvm::has_single_bit(Value);
326}
327
328/// Return true if the argument is a power of two > 0 (64 bit edition.)
329constexpr inline bool isPowerOf2_64(uint64_t Value) {
330 return llvm::has_single_bit(Value);
331}
332
333/// Count the number of ones from the most significant bit to the first
334/// zero bit.
335///
336/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
337/// Only unsigned integral types are allowed.
338///
339/// Returns std::numeric_limits<T>::digits on an input of all ones.
340template <typename T> unsigned countLeadingOnes(T Value) {
341 static_assert(std::is_unsigned_v<T>,
342 "Only unsigned integral types are allowed.");
343 return llvm::countl_one<T>(Value);
344}
345
346/// Count the number of ones from the least significant bit to the first
347/// zero bit.
348///
349/// Ex. countTrailingOnes(0x00FF00FF) == 8.
350/// Only unsigned integral types are allowed.
351///
352/// Returns std::numeric_limits<T>::digits on an input of all ones.
353template <typename T> unsigned countTrailingOnes(T Value) {
354 static_assert(std::is_unsigned_v<T>,
355 "Only unsigned integral types are allowed.");
356 return llvm::countr_one<T>(Value);
357}
358
359/// Count the number of set bits in a value.
360/// Ex. countPopulation(0xF000F000) = 8
361/// Returns 0 if the word is zero.
362template <typename T>
363inline unsigned countPopulation(T Value) {
364 static_assert(std::is_unsigned_v<T>,
365 "Only unsigned integral types are allowed.");
366 return (unsigned)llvm::popcount(Value);
367}
368
369/// Return true if the argument contains a non-empty sequence of ones with the
370/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
371/// If true, \p MaskIdx will specify the index of the lowest set bit and \p
372/// MaskLen is updated to specify the length of the mask, else neither are
373/// updated.
374inline bool isShiftedMask_32(uint32_t Value, unsigned &MaskIdx,
375 unsigned &MaskLen) {
376 if (!isShiftedMask_32(Value))
377 return false;
378 MaskIdx = llvm::countr_zero(Value);
379 MaskLen = llvm::popcount(Value);
380 return true;
381}
382
383/// Return true if the argument contains a non-empty sequence of ones with the
384/// remainder zero (64 bit version.) If true, \p MaskIdx will specify the index
385/// of the lowest set bit and \p MaskLen is updated to specify the length of the
386/// mask, else neither are updated.
387inline bool isShiftedMask_64(uint64_t Value, unsigned &MaskIdx,
388 unsigned &MaskLen) {
389 if (!isShiftedMask_64(Value))
390 return false;
391 MaskIdx = llvm::countr_zero(Value);
392 MaskLen = llvm::popcount(Value);
393 return true;
394}
395
396/// Compile time Log2.
397/// Valid only for positive powers of two.
398template <size_t kValue> constexpr inline size_t CTLog2() {
399 static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue),
400 "Value is not a valid power of 2");
401 return 1 + CTLog2<kValue / 2>();
402}
403
404template <> constexpr inline size_t CTLog2<1>() { return 0; }
405
406/// Return the floor log base 2 of the specified value, -1 if the value is zero.
407/// (32 bit edition.)
408/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
409inline unsigned Log2_32(uint32_t Value) {
410 return 31 - llvm::countl_zero(Value);
411}
412
413/// Return the floor log base 2 of the specified value, -1 if the value is zero.
414/// (64 bit edition.)
415inline unsigned Log2_64(uint64_t Value) {
416 return 63 - llvm::countl_zero(Value);
417}
418
419/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
420/// (32 bit edition).
421/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
422inline unsigned Log2_32_Ceil(uint32_t Value) {
423 return 32 - llvm::countl_zero(Value - 1);
424}
425
426/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
427/// (64 bit edition.)
428inline unsigned Log2_64_Ceil(uint64_t Value) {
429 return 64 - llvm::countl_zero(Value - 1);
430}
431
432/// This function takes a 64-bit integer and returns the bit equivalent double.
433inline double BitsToDouble(uint64_t Bits) {
434 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
435 return llvm::bit_cast<double>(Bits);
436}
437
438/// This function takes a 32-bit integer and returns the bit equivalent float.
439inline float BitsToFloat(uint32_t Bits) {
440 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
441 return llvm::bit_cast<float>(Bits);
442}
443
444/// This function takes a double and returns the bit equivalent 64-bit integer.
445/// Note that copying doubles around changes the bits of NaNs on some hosts,
446/// notably x86, so this routine cannot be used if these bits are needed.
447inline uint64_t DoubleToBits(double Double) {
448 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
449 return llvm::bit_cast<uint64_t>(Double);
450}
451
452/// This function takes a float and returns the bit equivalent 32-bit integer.
453/// Note that copying floats around changes the bits of NaNs on some hosts,
454/// notably x86, so this routine cannot be used if these bits are needed.
455inline uint32_t FloatToBits(float Float) {
456 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
457 return llvm::bit_cast<uint32_t>(Float);
458}
459
460/// A and B are either alignments or offsets. Return the minimum alignment that
461/// may be assumed after adding the two together.
462constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
463 // The largest power of 2 that divides both A and B.
464 //
465 // Replace "-Value" by "1+~Value" in the following commented code to avoid
466 // MSVC warning C4146
467 // return (A | B) & -(A | B);
468 return (A | B) & (1 + ~(A | B));
469}
470
471/// Returns the next power of two (in 64-bits) that is strictly greater than A.
472/// Returns zero on overflow.
473constexpr inline uint64_t NextPowerOf2(uint64_t A) {
474 A |= (A >> 1);
475 A |= (A >> 2);
476 A |= (A >> 4);
477 A |= (A >> 8);
478 A |= (A >> 16);
479 A |= (A >> 32);
480 return A + 1;
481}
482
483/// Returns the power of two which is less than or equal to the given value.
484/// Essentially, it is a floor operation across the domain of powers of two.
485inline uint64_t PowerOf2Floor(uint64_t A) {
486 return llvm::bit_floor(A);
487}
488
489/// Returns the power of two which is greater than or equal to the given value.
490/// Essentially, it is a ceil operation across the domain of powers of two.
491inline uint64_t PowerOf2Ceil(uint64_t A) {
492 if (!A)
493 return 0;
494 return NextPowerOf2(A - 1);
495}
496
497/// Returns the next integer (mod 2**64) that is greater than or equal to
498/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
499///
500/// Examples:
501/// \code
502/// alignTo(5, 8) = 8
503/// alignTo(17, 8) = 24
504/// alignTo(~0LL, 8) = 0
505/// alignTo(321, 255) = 510
506/// \endcode
507inline uint64_t alignTo(uint64_t Value, uint64_t Align) {
508 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 508, __extension__
__PRETTY_FUNCTION__))
;
509 return (Value + Align - 1) / Align * Align;
510}
511
512inline uint64_t alignToPowerOf2(uint64_t Value, uint64_t Align) {
513 assert(Align != 0 && (Align & (Align - 1)) == 0 &&(static_cast <bool> (Align != 0 && (Align &
(Align - 1)) == 0 && "Align must be a power of 2") ?
void (0) : __assert_fail ("Align != 0 && (Align & (Align - 1)) == 0 && \"Align must be a power of 2\""
, "llvm/include/llvm/Support/MathExtras.h", 514, __extension__
__PRETTY_FUNCTION__))
514 "Align must be a power of 2")(static_cast <bool> (Align != 0 && (Align &
(Align - 1)) == 0 && "Align must be a power of 2") ?
void (0) : __assert_fail ("Align != 0 && (Align & (Align - 1)) == 0 && \"Align must be a power of 2\""
, "llvm/include/llvm/Support/MathExtras.h", 514, __extension__
__PRETTY_FUNCTION__))
;
515 return (Value + Align - 1) & -Align;
516}
517
518/// If non-zero \p Skew is specified, the return value will be a minimal integer
519/// that is greater than or equal to \p Size and equal to \p A * N + \p Skew for
520/// some integer N. If \p Skew is larger than \p A, its value is adjusted to '\p
521/// Skew mod \p A'. \p Align must be non-zero.
522///
523/// Examples:
524/// \code
525/// alignTo(5, 8, 7) = 7
526/// alignTo(17, 8, 1) = 17
527/// alignTo(~0LL, 8, 3) = 3
528/// alignTo(321, 255, 42) = 552
529/// \endcode
530inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew) {
531 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 531, __extension__
__PRETTY_FUNCTION__))
;
532 Skew %= Align;
533 return alignTo(Value - Skew, Align) + Skew;
534}
535
536/// Returns the next integer (mod 2**64) that is greater than or equal to
537/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
538template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
539 static_assert(Align != 0u, "Align must be non-zero");
540 return (Value + Align - 1) / Align * Align;
541}
542
543/// Returns the integer ceil(Numerator / Denominator).
544inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
545 return alignTo(Numerator, Denominator) / Denominator;
546}
547
548/// Returns the integer nearest(Numerator / Denominator).
549inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) {
550 return (Numerator + (Denominator / 2)) / Denominator;
551}
552
553/// Returns the largest uint64_t less than or equal to \p Value and is
554/// \p Skew mod \p Align. \p Align must be non-zero
555inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
556 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 556, __extension__
__PRETTY_FUNCTION__))
;
557 Skew %= Align;
558 return (Value - Skew) / Align * Align + Skew;
559}
560
561/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
562/// Requires 0 < B <= 32.
563template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
564 static_assert(B > 0, "Bit width can't be 0.");
565 static_assert(B <= 32, "Bit width out of range.");
566 return int32_t(X << (32 - B)) >> (32 - B);
567}
568
569/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
570/// Requires 0 < B <= 32.
571inline int32_t SignExtend32(uint32_t X, unsigned B) {
572 assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0."
) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 572, __extension__
__PRETTY_FUNCTION__))
;
573 assert(B <= 32 && "Bit width out of range.")(static_cast <bool> (B <= 32 && "Bit width out of range."
) ? void (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\""
, "llvm/include/llvm/Support/MathExtras.h", 573, __extension__
__PRETTY_FUNCTION__))
;
574 return int32_t(X << (32 - B)) >> (32 - B);
575}
576
577/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
578/// Requires 0 < B <= 64.
579template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
580 static_assert(B > 0, "Bit width can't be 0.");
581 static_assert(B <= 64, "Bit width out of range.");
582 return int64_t(x << (64 - B)) >> (64 - B);
583}
584
585/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
586/// Requires 0 < B <= 64.
587inline int64_t SignExtend64(uint64_t X, unsigned B) {
588 assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0."
) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 588, __extension__
__PRETTY_FUNCTION__))
;
589 assert(B <= 64 && "Bit width out of range.")(static_cast <bool> (B <= 64 && "Bit width out of range."
) ? void (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\""
, "llvm/include/llvm/Support/MathExtras.h", 589, __extension__
__PRETTY_FUNCTION__))
;
590 return int64_t(X << (64 - B)) >> (64 - B);
591}
592
593/// Subtract two unsigned integers, X and Y, of type T and return the absolute
594/// value of the result.
595template <typename T>
596std::enable_if_t<std::is_unsigned<T>::value, T> AbsoluteDifference(T X, T Y) {
597 return X > Y ? (X - Y) : (Y - X);
598}
599
600/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
601/// maximum representable value of T on overflow. ResultOverflowed indicates if
602/// the result is larger than the maximum representable value of type T.
603template <typename T>
604std::enable_if_t<std::is_unsigned<T>::value, T>
605SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
606 bool Dummy;
607 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
608 // Hacker's Delight, p. 29
609 T Z = X + Y;
610 Overflowed = (Z < X || Z < Y);
611 if (Overflowed)
612 return std::numeric_limits<T>::max();
613 else
614 return Z;
615}
616
617/// Add multiple unsigned integers of type T. Clamp the result to the
618/// maximum representable value of T on overflow.
619template <class T, class... Ts>
620std::enable_if_t<std::is_unsigned_v<T>, T> SaturatingAdd(T X, T Y, T Z,
621 Ts... Args) {
622 bool Overflowed = false;
623 T XY = SaturatingAdd(X, Y, &Overflowed);
624 if (Overflowed)
625 return SaturatingAdd(std::numeric_limits<T>::max(), T(1), Args...);
626 return SaturatingAdd(XY, Z, Args...);
627}
628
629/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
630/// maximum representable value of T on overflow. ResultOverflowed indicates if
631/// the result is larger than the maximum representable value of type T.
632template <typename T>
633std::enable_if_t<std::is_unsigned<T>::value, T>
634SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
635 bool Dummy;
636 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
637
638 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
639 // because it fails for uint16_t (where multiplication can have undefined
640 // behavior due to promotion to int), and requires a division in addition
641 // to the multiplication.
642
643 Overflowed = false;
644
645 // Log2(Z) would be either Log2Z or Log2Z + 1.
646 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
647 // will necessarily be less than Log2Max as desired.
648 int Log2Z = Log2_64(X) + Log2_64(Y);
649 const T Max = std::numeric_limits<T>::max();
650 int Log2Max = Log2_64(Max);
651 if (Log2Z < Log2Max) {
652 return X * Y;
653 }
654 if (Log2Z > Log2Max) {
655 Overflowed = true;
656 return Max;
657 }
658
659 // We're going to use the top bit, and maybe overflow one
660 // bit past it. Multiply all but the bottom bit then add
661 // that on at the end.
662 T Z = (X >> 1) * Y;
663 if (Z & ~(Max >> 1)) {
664 Overflowed = true;
665 return Max;
666 }
667 Z <<= 1;
668 if (X & 1)
669 return SaturatingAdd(Z, Y, ResultOverflowed);
670
671 return Z;
672}
673
674/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
675/// the product. Clamp the result to the maximum representable value of T on
676/// overflow. ResultOverflowed indicates if the result is larger than the
677/// maximum representable value of type T.
678template <typename T>
679std::enable_if_t<std::is_unsigned<T>::value, T>
680SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
681 bool Dummy;
682 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
683
684 T Product = SaturatingMultiply(X, Y, &Overflowed);
685 if (Overflowed)
686 return Product;
687
688 return SaturatingAdd(A, Product, &Overflowed);
689}
690
691/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
692extern const float huge_valf;
693
694
695/// Add two signed integers, computing the two's complement truncated result,
696/// returning true if overflow occurred.
697template <typename T>
698std::enable_if_t<std::is_signed<T>::value, T> AddOverflow(T X, T Y, T &Result) {
699#if __has_builtin(__builtin_add_overflow)1
700 return __builtin_add_overflow(X, Y, &Result);
701#else
702 // Perform the unsigned addition.
703 using U = std::make_unsigned_t<T>;
704 const U UX = static_cast<U>(X);
705 const U UY = static_cast<U>(Y);
706 const U UResult = UX + UY;
707
708 // Convert to signed.
709 Result = static_cast<T>(UResult);
710
711 // Adding two positive numbers should result in a positive number.
712 if (X > 0 && Y > 0)
713 return Result <= 0;
714 // Adding two negatives should result in a negative number.
715 if (X < 0 && Y < 0)
716 return Result >= 0;
717 return false;
718#endif
719}
720
721/// Subtract two signed integers, computing the two's complement truncated
722/// result, returning true if an overflow ocurred.
723template <typename T>
724std::enable_if_t<std::is_signed<T>::value, T> SubOverflow(T X, T Y, T &Result) {
725#if __has_builtin(__builtin_sub_overflow)1
726 return __builtin_sub_overflow(X, Y, &Result);
727#else
728 // Perform the unsigned addition.
729 using U = std::make_unsigned_t<T>;
730 const U UX = static_cast<U>(X);
731 const U UY = static_cast<U>(Y);
732 const U UResult = UX - UY;
733
734 // Convert to signed.
735 Result = static_cast<T>(UResult);
736
737 // Subtracting a positive number from a negative results in a negative number.
738 if (X <= 0 && Y > 0)
739 return Result >= 0;
740 // Subtracting a negative number from a positive results in a positive number.
741 if (X >= 0 && Y < 0)
742 return Result <= 0;
743 return false;
744#endif
745}
746
747/// Multiply two signed integers, computing the two's complement truncated
748/// result, returning true if an overflow ocurred.
749template <typename T>
750std::enable_if_t<std::is_signed<T>::value, T> MulOverflow(T X, T Y, T &Result) {
751 // Perform the unsigned multiplication on absolute values.
752 using U = std::make_unsigned_t<T>;
753 const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X);
754 const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y);
755 const U UResult = UX * UY;
756
757 // Convert to signed.
758 const bool IsNegative = (X < 0) ^ (Y < 0);
759 Result = IsNegative ? (0 - UResult) : UResult;
760
761 // If any of the args was 0, result is 0 and no overflow occurs.
762 if (UX == 0 || UY == 0)
763 return false;
764
765 // UX and UY are in [1, 2^n], where n is the number of digits.
766 // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
767 // positive) divided by an argument compares to the other.
768 if (IsNegative)
769 return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY;
770 else
771 return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY;
772}
773
774} // End llvm namespace
775
776#endif

/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/limits

1// The template and inlines for the numeric_limits classes. -*- C++ -*-
2
3// Copyright (C) 1999-2020 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/** @file include/limits
26 * This is a Standard C++ Library header.
27 */
28
29// Note: this is not a conforming implementation.
30// Written by Gabriel Dos Reis <gdr@codesourcery.com>
31
32//
33// ISO 14882:1998
34// 18.2.1
35//
36
37#ifndef _GLIBCXX_NUMERIC_LIMITS1
38#define _GLIBCXX_NUMERIC_LIMITS1 1
39
40#pragma GCC system_header
41
42#include <bits/c++config.h>
43
44//
45// The numeric_limits<> traits document implementation-defined aspects
46// of fundamental arithmetic data types (integers and floating points).
47// From Standard C++ point of view, there are 14 such types:
48// * integers
49// bool (1)
50// char, signed char, unsigned char, wchar_t (4)
51// short, unsigned short (2)
52// int, unsigned (2)
53// long, unsigned long (2)
54//
55// * floating points
56// float (1)
57// double (1)
58// long double (1)
59//
60// GNU C++ understands (where supported by the host C-library)
61// * integer
62// long long, unsigned long long (2)
63//
64// which brings us to 16 fundamental arithmetic data types in GNU C++.
65//
66//
67// Since a numeric_limits<> is a bit tricky to get right, we rely on
68// an interface composed of macros which should be defined in config/os
69// or config/cpu when they differ from the generic (read arbitrary)
70// definitions given here.
71//
72
73// These values can be overridden in the target configuration file.
74// The default values are appropriate for many 32-bit targets.
75
76// GCC only intrinsically supports modulo integral types. The only remaining
77// integral exceptional values is division by zero. Only targets that do not
78// signal division by zero in some "hard to ignore" way should use false.
79#ifndef __glibcxx_integral_trapstrue
80# define __glibcxx_integral_trapstrue true
81#endif
82
83// float
84//
85
86// Default values. Should be overridden in configuration files if necessary.
87
88#ifndef __glibcxx_float_has_denorm_loss
89# define __glibcxx_float_has_denorm_loss false
90#endif
91#ifndef __glibcxx_float_traps
92# define __glibcxx_float_traps false
93#endif
94#ifndef __glibcxx_float_tinyness_before
95# define __glibcxx_float_tinyness_before false
96#endif
97
98// double
99
100// Default values. Should be overridden in configuration files if necessary.
101
102#ifndef __glibcxx_double_has_denorm_loss
103# define __glibcxx_double_has_denorm_loss false
104#endif
105#ifndef __glibcxx_double_traps
106# define __glibcxx_double_traps false
107#endif
108#ifndef __glibcxx_double_tinyness_before
109# define __glibcxx_double_tinyness_before false
110#endif
111
112// long double
113
114// Default values. Should be overridden in configuration files if necessary.
115
116#ifndef __glibcxx_long_double_has_denorm_loss
117# define __glibcxx_long_double_has_denorm_loss false
118#endif
119#ifndef __glibcxx_long_double_traps
120# define __glibcxx_long_double_traps false
121#endif
122#ifndef __glibcxx_long_double_tinyness_before
123# define __glibcxx_long_double_tinyness_before false
124#endif
125
126// You should not need to define any macros below this point.
127
128#define __glibcxx_signed_b(T,B)((T)(-1) < 0) ((T)(-1) < 0)
129
130#define __glibcxx_min_b(T,B)(((T)(-1) < 0) ? -(((T)(-1) < 0) ? (((((T)1 << ((
B - ((T)(-1) < 0)) - 1)) - 1) << 1) + 1) : ~(T)0) - 1
: (T)0)
\
131 (__glibcxx_signed_b (T,B)((T)(-1) < 0) ? -__glibcxx_max_b (T,B)(((T)(-1) < 0) ? (((((T)1 << ((B - ((T)(-1) < 0))
- 1)) - 1) << 1) + 1) : ~(T)0)
- 1 : (T)0)
132
133#define __glibcxx_max_b(T,B)(((T)(-1) < 0) ? (((((T)1 << ((B - ((T)(-1) < 0))
- 1)) - 1) << 1) + 1) : ~(T)0)
\
134 (__glibcxx_signed_b (T,B)((T)(-1) < 0) ? \
135 (((((T)1 << (__glibcxx_digits_b (T,B)(B - ((T)(-1) < 0)) - 1)) - 1) << 1) + 1) : ~(T)0)
136
137#define __glibcxx_digits_b(T,B)(B - ((T)(-1) < 0)) \
138 (B - __glibcxx_signed_b (T,B)((T)(-1) < 0))
139
140// The fraction 643/2136 approximates log10(2) to 7 significant digits.
141#define __glibcxx_digits10_b(T,B)((B - ((T)(-1) < 0)) * 643L / 2136) \
142 (__glibcxx_digits_b (T,B)(B - ((T)(-1) < 0)) * 643L / 2136)
143
144#define __glibcxx_signed(T) \
145 __glibcxx_signed_b (T, sizeof(T) * __CHAR_BIT__)((T)(-1) < 0)
146#define __glibcxx_min(T) \
147 __glibcxx_min_b (T, sizeof(T) * __CHAR_BIT__)(((T)(-1) < 0) ? -(((T)(-1) < 0) ? (((((T)1 << ((
sizeof(T) * 8 - ((T)(-1) < 0)) - 1)) - 1) << 1) + 1)
: ~(T)0) - 1 : (T)0)
148#define __glibcxx_max(T) \
149 __glibcxx_max_b (T, sizeof(T) * __CHAR_BIT__)(((T)(-1) < 0) ? (((((T)1 << ((sizeof(T) * 8 - ((T)(
-1) < 0)) - 1)) - 1) << 1) + 1) : ~(T)0)
150#define __glibcxx_digits(T) \
151 __glibcxx_digits_b (T, sizeof(T) * __CHAR_BIT__)(sizeof(T) * 8 - ((T)(-1) < 0))
152#define __glibcxx_digits10(T) \
153 __glibcxx_digits10_b (T, sizeof(T) * __CHAR_BIT__)((sizeof(T) * 8 - ((T)(-1) < 0)) * 643L / 2136)
154
155#define __glibcxx_max_digits10(T) \
156 (2 + (T) * 643L / 2136)
157
158namespace std _GLIBCXX_VISIBILITY(default)__attribute__ ((__visibility__ ("default")))
159{
160_GLIBCXX_BEGIN_NAMESPACE_VERSION
161
162 /**
163 * @brief Describes the rounding style for floating-point types.
164 *
165 * This is used in the std::numeric_limits class.
166 */
167 enum float_round_style
168 {
169 round_indeterminate = -1, /// Intermediate.
170 round_toward_zero = 0, /// To zero.
171 round_to_nearest = 1, /// To the nearest representable value.
172 round_toward_infinity = 2, /// To infinity.
173 round_toward_neg_infinity = 3 /// To negative infinity.
174 };
175
176 /**
177 * @brief Describes the denormalization for floating-point types.
178 *
179 * These values represent the presence or absence of a variable number
180 * of exponent bits. This type is used in the std::numeric_limits class.
181 */
182 enum float_denorm_style
183 {
184 /// Indeterminate at compile time whether denormalized values are allowed.
185 denorm_indeterminate = -1,
186 /// The type does not allow denormalized values.
187 denorm_absent = 0,
188 /// The type allows denormalized values.
189 denorm_present = 1
190 };
191
192 /**
193 * @brief Part of std::numeric_limits.
194 *
195 * The @c static @c const members are usable as integral constant
196 * expressions.
197 *
198 * @note This is a separate class for purposes of efficiency; you
199 * should only access these members as part of an instantiation
200 * of the std::numeric_limits class.
201 */
202 struct __numeric_limits_base
203 {
204 /** This will be true for all fundamental types (which have
205 specializations), and false for everything else. */
206 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = false;
207
208 /** The number of @c radix digits that be represented without change: for
209 integer types, the number of non-sign bits in the mantissa; for
210 floating types, the number of @c radix digits in the mantissa. */
211 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = 0;
212
213 /** The number of base 10 digits that can be represented without change. */
214 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 = 0;
215
216#if __cplusplus201703L >= 201103L
217 /** The number of base 10 digits required to ensure that values which
218 differ are always differentiated. */
219 static constexpr int max_digits10 = 0;
220#endif
221
222 /** True if the type is signed. */
223 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = false;
224
225 /** True if the type is integer. */
226 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = false;
227
228 /** True if the type uses an exact representation. All integer types are
229 exact, but not all exact types are integer. For example, rational and
230 fixed-exponent representations are exact but not integer. */
231 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = false;
232
233 /** For integer types, specifies the base of the representation. For
234 floating types, specifies the base of the exponent representation. */
235 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 0;
236
237 /** The minimum negative integer such that @c radix raised to the power of
238 (one less than that integer) is a normalized floating point number. */
239 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
240
241 /** The minimum negative integer such that 10 raised to that power is in
242 the range of normalized floating point numbers. */
243 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
244
245 /** The maximum positive integer such that @c radix raised to the power of
246 (one less than that integer) is a representable finite floating point
247 number. */
248 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
249
250 /** The maximum positive integer such that 10 raised to that power is in
251 the range of representable finite floating point numbers. */
252 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
253
254 /** True if the type has a representation for positive infinity. */
255 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
256
257 /** True if the type has a representation for a quiet (non-signaling)
258 Not a Number. */
259 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
260
261 /** True if the type has a representation for a signaling
262 Not a Number. */
263 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
264
265 /** See std::float_denorm_style for more information. */
266 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm = denorm_absent;
267
268 /** True if loss of accuracy is detected as a denormalization loss,
269 rather than as an inexact result. */
270 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
271
272 /** True if-and-only-if the type adheres to the IEC 559 standard, also
273 known as IEEE 754. (Only makes sense for floating point types.) */
274 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
275
276 /** True if the set of values representable by the type is
277 finite. All built-in types are bounded, this member would be
278 false for arbitrary precision types. */
279 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = false;
280
281 /** True if the type is @e modulo. A type is modulo if, for any
282 operation involving +, -, or * on values of that type whose
283 result would fall outside the range [min(),max()], the value
284 returned differs from the true value by an integer multiple of
285 max() - min() + 1. On most machines, this is false for floating
286 types, true for unsigned integers, and true for signed integers.
287 See PR22200 about signed integers. */
288 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false;
289
290 /** True if trapping is implemented for this type. */
291 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = false;
292
293 /** True if tininess is detected before rounding. (see IEC 559) */
294 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
295
296 /** See std::float_round_style for more information. This is only
297 meaningful for floating types; integer types will all be
298 round_toward_zero. */
299 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style =
300 round_toward_zero;
301 };
302
303 /**
304 * @brief Properties of fundamental types.
305 *
306 * This class allows a program to obtain information about the
307 * representation of a fundamental type on a given platform. For
308 * non-fundamental types, the functions will return 0 and the data
309 * members will all be @c false.
310 */
311 template<typename _Tp>
312 struct numeric_limits : public __numeric_limits_base
313 {
314 /** The minimum finite value, or for floating types with
315 denormalization, the minimum positive normalized value. */
316 static _GLIBCXX_CONSTEXPRconstexpr _Tp
317 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return _Tp(); }
318
319 /** The maximum finite value. */
320 static _GLIBCXX_CONSTEXPRconstexpr _Tp
321 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return _Tp(); }
322
323#if __cplusplus201703L >= 201103L
324 /** A finite value x such that there is no other finite value y
325 * where y < x. */
326 static constexpr _Tp
327 lowest() noexcept { return _Tp(); }
328#endif
329
330 /** The @e machine @e epsilon: the difference between 1 and the least
331 value greater than 1 that is representable. */
332 static _GLIBCXX_CONSTEXPRconstexpr _Tp
333 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return _Tp(); }
334
335 /** The maximum rounding error measurement (see LIA-1). */
336 static _GLIBCXX_CONSTEXPRconstexpr _Tp
337 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return _Tp(); }
338
339 /** The representation of positive infinity, if @c has_infinity. */
340 static _GLIBCXX_CONSTEXPRconstexpr _Tp
341 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return _Tp(); }
342
343 /** The representation of a quiet Not a Number,
344 if @c has_quiet_NaN. */
345 static _GLIBCXX_CONSTEXPRconstexpr _Tp
346 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return _Tp(); }
347
348 /** The representation of a signaling Not a Number, if
349 @c has_signaling_NaN. */
350 static _GLIBCXX_CONSTEXPRconstexpr _Tp
351 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return _Tp(); }
352
353 /** The minimum positive denormalized value. For types where
354 @c has_denorm is false, this is the minimum positive normalized
355 value. */
356 static _GLIBCXX_CONSTEXPRconstexpr _Tp
357 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return _Tp(); }
358 };
359
360 // _GLIBCXX_RESOLVE_LIB_DEFECTS
361 // 559. numeric_limits<const T>
362
363 template<typename _Tp>
364 struct numeric_limits<const _Tp>
365 : public numeric_limits<_Tp> { };
366
367 template<typename _Tp>
368 struct numeric_limits<volatile _Tp>
369 : public numeric_limits<_Tp> { };
370
371 template<typename _Tp>
372 struct numeric_limits<const volatile _Tp>
373 : public numeric_limits<_Tp> { };
374
375 // Now there follow 16 explicit specializations. Yes, 16. Make sure
376 // you get the count right. (18 in C++11 mode, with char16_t and char32_t.)
377 // (+1 if char8_t is enabled.)
378
379 // _GLIBCXX_RESOLVE_LIB_DEFECTS
380 // 184. numeric_limits<bool> wording problems
381
382 /// numeric_limits<bool> specialization.
383 template<>
384 struct numeric_limits<bool>
385 {
386 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
387
388 static _GLIBCXX_CONSTEXPRconstexpr bool
389 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return false; }
390
391 static _GLIBCXX_CONSTEXPRconstexpr bool
392 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return true; }
393
394#if __cplusplus201703L >= 201103L
395 static constexpr bool
396 lowest() noexcept { return min(); }
397#endif
398 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = 1;
399 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 = 0;
400#if __cplusplus201703L >= 201103L
401 static constexpr int max_digits10 = 0;
402#endif
403 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = false;
404 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
405 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
406 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
407
408 static _GLIBCXX_CONSTEXPRconstexpr bool
409 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return false; }
410
411 static _GLIBCXX_CONSTEXPRconstexpr bool
412 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return false; }
413
414 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
415 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
416 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
417 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
418
419 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
420 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
421 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
422 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
423 = denorm_absent;
424 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
425
426 static _GLIBCXX_CONSTEXPRconstexpr bool
427 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return false; }
428
429 static _GLIBCXX_CONSTEXPRconstexpr bool
430 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return false; }
431
432 static _GLIBCXX_CONSTEXPRconstexpr bool
433 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return false; }
434
435 static _GLIBCXX_CONSTEXPRconstexpr bool
436 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return false; }
437
438 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
439 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
440 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false;
441
442 // It is not clear what it means for a boolean type to trap.
443 // This is a DR on the LWG issue list. Here, I use integer
444 // promotion semantics.
445 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
446 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
447 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
448 = round_toward_zero;
449 };
450
451 /// numeric_limits<char> specialization.
452 template<>
453 struct numeric_limits<char>
454 {
455 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
456
457 static _GLIBCXX_CONSTEXPRconstexpr char
458 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return __glibcxx_min(char); }
459
460 static _GLIBCXX_CONSTEXPRconstexpr char
461 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __glibcxx_max(char); }
462
463#if __cplusplus201703L >= 201103L
464 static constexpr char
465 lowest() noexcept { return min(); }
466#endif
467
468 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = __glibcxx_digits (char);
469 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 = __glibcxx_digits10 (char);
470#if __cplusplus201703L >= 201103L
471 static constexpr int max_digits10 = 0;
472#endif
473 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = __glibcxx_signed (char);
474 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
475 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
476 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
477
478 static _GLIBCXX_CONSTEXPRconstexpr char
479 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
480
481 static _GLIBCXX_CONSTEXPRconstexpr char
482 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
483
484 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
485 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
486 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
487 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
488
489 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
490 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
491 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
492 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
493 = denorm_absent;
494 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
495
496 static _GLIBCXX_CONSTEXPRconstexpr
497 char infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return char(); }
498
499 static _GLIBCXX_CONSTEXPRconstexpr char
500 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return char(); }
501
502 static _GLIBCXX_CONSTEXPRconstexpr char
503 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return char(); }
504
505 static _GLIBCXX_CONSTEXPRconstexpr char
506 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<char>(0); }
507
508 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
509 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
510 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = !is_signed;
511
512 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
513 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
514 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
515 = round_toward_zero;
516 };
517
518 /// numeric_limits<signed char> specialization.
519 template<>
520 struct numeric_limits<signed char>
521 {
522 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
523
524 static _GLIBCXX_CONSTEXPRconstexpr signed char
525 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return -__SCHAR_MAX__127 - 1; }
526
527 static _GLIBCXX_CONSTEXPRconstexpr signed char
528 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __SCHAR_MAX__127; }
529
530#if __cplusplus201703L >= 201103L
531 static constexpr signed char
532 lowest() noexcept { return min(); }
533#endif
534
535 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = __glibcxx_digits (signed char);
536 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10
537 = __glibcxx_digits10 (signed char);
538#if __cplusplus201703L >= 201103L
539 static constexpr int max_digits10 = 0;
540#endif
541 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = true;
542 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
543 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
544 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
545
546 static _GLIBCXX_CONSTEXPRconstexpr signed char
547 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
548
549 static _GLIBCXX_CONSTEXPRconstexpr signed char
550 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
551
552 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
553 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
554 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
555 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
556
557 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
558 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
559 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
560 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
561 = denorm_absent;
562 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
563
564 static _GLIBCXX_CONSTEXPRconstexpr signed char
565 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<signed char>(0); }
566
567 static _GLIBCXX_CONSTEXPRconstexpr signed char
568 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<signed char>(0); }
569
570 static _GLIBCXX_CONSTEXPRconstexpr signed char
571 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
572 { return static_cast<signed char>(0); }
573
574 static _GLIBCXX_CONSTEXPRconstexpr signed char
575 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept
576 { return static_cast<signed char>(0); }
577
578 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
579 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
580 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false;
581
582 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
583 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
584 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
585 = round_toward_zero;
586 };
587
588 /// numeric_limits<unsigned char> specialization.
589 template<>
590 struct numeric_limits<unsigned char>
591 {
592 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
593
594 static _GLIBCXX_CONSTEXPRconstexpr unsigned char
595 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
596
597 static _GLIBCXX_CONSTEXPRconstexpr unsigned char
598 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __SCHAR_MAX__127 * 2U + 1; }
599
600#if __cplusplus201703L >= 201103L
601 static constexpr unsigned char
602 lowest() noexcept { return min(); }
603#endif
604
605 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits
606 = __glibcxx_digits (unsigned char);
607 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10
608 = __glibcxx_digits10 (unsigned char);
609#if __cplusplus201703L >= 201103L
610 static constexpr int max_digits10 = 0;
611#endif
612 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = false;
613 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
614 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
615 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
616
617 static _GLIBCXX_CONSTEXPRconstexpr unsigned char
618 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
619
620 static _GLIBCXX_CONSTEXPRconstexpr unsigned char
621 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
622
623 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
624 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
625 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
626 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
627
628 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
629 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
630 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
631 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
632 = denorm_absent;
633 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
634
635 static _GLIBCXX_CONSTEXPRconstexpr unsigned char
636 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept
637 { return static_cast<unsigned char>(0); }
638
639 static _GLIBCXX_CONSTEXPRconstexpr unsigned char
640 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
641 { return static_cast<unsigned char>(0); }
642
643 static _GLIBCXX_CONSTEXPRconstexpr unsigned char
644 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
645 { return static_cast<unsigned char>(0); }
646
647 static _GLIBCXX_CONSTEXPRconstexpr unsigned char
648 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept
649 { return static_cast<unsigned char>(0); }
650
651 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
652 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
653 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = true;
654
655 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
656 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
657 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
658 = round_toward_zero;
659 };
660
661 /// numeric_limits<wchar_t> specialization.
662 template<>
663 struct numeric_limits<wchar_t>
664 {
665 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
666
667 static _GLIBCXX_CONSTEXPRconstexpr wchar_t
668 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return __glibcxx_min (wchar_t); }
669
670 static _GLIBCXX_CONSTEXPRconstexpr wchar_t
671 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __glibcxx_max (wchar_t); }
672
673#if __cplusplus201703L >= 201103L
674 static constexpr wchar_t
675 lowest() noexcept { return min(); }
676#endif
677
678 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = __glibcxx_digits (wchar_t);
679 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10
680 = __glibcxx_digits10 (wchar_t);
681#if __cplusplus201703L >= 201103L
682 static constexpr int max_digits10 = 0;
683#endif
684 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = __glibcxx_signed (wchar_t);
685 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
686 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
687 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
688
689 static _GLIBCXX_CONSTEXPRconstexpr wchar_t
690 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
691
692 static _GLIBCXX_CONSTEXPRconstexpr wchar_t
693 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
694
695 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
696 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
697 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
698 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
699
700 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
701 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
702 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
703 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
704 = denorm_absent;
705 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
706
707 static _GLIBCXX_CONSTEXPRconstexpr wchar_t
708 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return wchar_t(); }
709
710 static _GLIBCXX_CONSTEXPRconstexpr wchar_t
711 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return wchar_t(); }
712
713 static _GLIBCXX_CONSTEXPRconstexpr wchar_t
714 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return wchar_t(); }
715
716 static _GLIBCXX_CONSTEXPRconstexpr wchar_t
717 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return wchar_t(); }
718
719 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
720 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
721 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = !is_signed;
722
723 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
724 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
725 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
726 = round_toward_zero;
727 };
728
729#if _GLIBCXX_USE_CHAR8_T
730 /// numeric_limits<char8_t> specialization.
731 template<>
732 struct numeric_limits<char8_t>
733 {
734 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
735
736 static _GLIBCXX_CONSTEXPRconstexpr char8_t
737 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return __glibcxx_min (char8_t); }
738
739 static _GLIBCXX_CONSTEXPRconstexpr char8_t
740 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __glibcxx_max (char8_t); }
741
742 static _GLIBCXX_CONSTEXPRconstexpr char8_t
743 lowest() _GLIBCXX_USE_NOEXCEPTnoexcept { return min(); }
744
745 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = __glibcxx_digits (char8_t);
746 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 = __glibcxx_digits10 (char8_t);
747 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_digits10 = 0;
748 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = __glibcxx_signed (char8_t);
749 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
750 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
751 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
752
753 static _GLIBCXX_CONSTEXPRconstexpr char8_t
754 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
755
756 static _GLIBCXX_CONSTEXPRconstexpr char8_t
757 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
758
759 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
760 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
761 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
762 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
763
764 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
765 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
766 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
767 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
768 = denorm_absent;
769 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
770
771 static _GLIBCXX_CONSTEXPRconstexpr char8_t
772 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return char8_t(); }
773
774 static _GLIBCXX_CONSTEXPRconstexpr char8_t
775 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return char8_t(); }
776
777 static _GLIBCXX_CONSTEXPRconstexpr char8_t
778 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return char8_t(); }
779
780 static _GLIBCXX_CONSTEXPRconstexpr char8_t
781 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return char8_t(); }
782
783 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
784 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
785 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = !is_signed;
786
787 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
788 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
789 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
790 = round_toward_zero;
791 };
792#endif
793
794#if __cplusplus201703L >= 201103L
795 /// numeric_limits<char16_t> specialization.
796 template<>
797 struct numeric_limits<char16_t>
798 {
799 static constexpr bool is_specialized = true;
800
801 static constexpr char16_t
802 min() noexcept { return __glibcxx_min (char16_t); }
803
804 static constexpr char16_t
805 max() noexcept { return __glibcxx_max (char16_t); }
806
807 static constexpr char16_t
808 lowest() noexcept { return min(); }
809
810 static constexpr int digits = __glibcxx_digits (char16_t);
811 static constexpr int digits10 = __glibcxx_digits10 (char16_t);
812 static constexpr int max_digits10 = 0;
813 static constexpr bool is_signed = __glibcxx_signed (char16_t);
814 static constexpr bool is_integer = true;
815 static constexpr bool is_exact = true;
816 static constexpr int radix = 2;
817
818 static constexpr char16_t
819 epsilon() noexcept { return 0; }
820
821 static constexpr char16_t
822 round_error() noexcept { return 0; }
823
824 static constexpr int min_exponent = 0;
825 static constexpr int min_exponent10 = 0;
826 static constexpr int max_exponent = 0;
827 static constexpr int max_exponent10 = 0;
828
829 static constexpr bool has_infinity = false;
830 static constexpr bool has_quiet_NaN = false;
831 static constexpr bool has_signaling_NaN = false;
832 static constexpr float_denorm_style has_denorm = denorm_absent;
833 static constexpr bool has_denorm_loss = false;
834
835 static constexpr char16_t
836 infinity() noexcept { return char16_t(); }
837
838 static constexpr char16_t
839 quiet_NaN() noexcept { return char16_t(); }
840
841 static constexpr char16_t
842 signaling_NaN() noexcept { return char16_t(); }
843
844 static constexpr char16_t
845 denorm_min() noexcept { return char16_t(); }
846
847 static constexpr bool is_iec559 = false;
848 static constexpr bool is_bounded = true;
849 static constexpr bool is_modulo = !is_signed;
850
851 static constexpr bool traps = __glibcxx_integral_trapstrue;
852 static constexpr bool tinyness_before = false;
853 static constexpr float_round_style round_style = round_toward_zero;
854 };
855
856 /// numeric_limits<char32_t> specialization.
857 template<>
858 struct numeric_limits<char32_t>
859 {
860 static constexpr bool is_specialized = true;
861
862 static constexpr char32_t
863 min() noexcept { return __glibcxx_min (char32_t); }
864
865 static constexpr char32_t
866 max() noexcept { return __glibcxx_max (char32_t); }
867
868 static constexpr char32_t
869 lowest() noexcept { return min(); }
870
871 static constexpr int digits = __glibcxx_digits (char32_t);
872 static constexpr int digits10 = __glibcxx_digits10 (char32_t);
873 static constexpr int max_digits10 = 0;
874 static constexpr bool is_signed = __glibcxx_signed (char32_t);
875 static constexpr bool is_integer = true;
876 static constexpr bool is_exact = true;
877 static constexpr int radix = 2;
878
879 static constexpr char32_t
880 epsilon() noexcept { return 0; }
881
882 static constexpr char32_t
883 round_error() noexcept { return 0; }
884
885 static constexpr int min_exponent = 0;
886 static constexpr int min_exponent10 = 0;
887 static constexpr int max_exponent = 0;
888 static constexpr int max_exponent10 = 0;
889
890 static constexpr bool has_infinity = false;
891 static constexpr bool has_quiet_NaN = false;
892 static constexpr bool has_signaling_NaN = false;
893 static constexpr float_denorm_style has_denorm = denorm_absent;
894 static constexpr bool has_denorm_loss = false;
895
896 static constexpr char32_t
897 infinity() noexcept { return char32_t(); }
898
899 static constexpr char32_t
900 quiet_NaN() noexcept { return char32_t(); }
901
902 static constexpr char32_t
903 signaling_NaN() noexcept { return char32_t(); }
904
905 static constexpr char32_t
906 denorm_min() noexcept { return char32_t(); }
907
908 static constexpr bool is_iec559 = false;
909 static constexpr bool is_bounded = true;
910 static constexpr bool is_modulo = !is_signed;
911
912 static constexpr bool traps = __glibcxx_integral_trapstrue;
913 static constexpr bool tinyness_before = false;
914 static constexpr float_round_style round_style = round_toward_zero;
915 };
916#endif
917
918 /// numeric_limits<short> specialization.
919 template<>
920 struct numeric_limits<short>
921 {
922 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
923
924 static _GLIBCXX_CONSTEXPRconstexpr short
925 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return -__SHRT_MAX__32767 - 1; }
926
927 static _GLIBCXX_CONSTEXPRconstexpr short
928 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __SHRT_MAX__32767; }
929
930#if __cplusplus201703L >= 201103L
931 static constexpr short
932 lowest() noexcept { return min(); }
933#endif
934
935 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = __glibcxx_digits (short);
936 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 = __glibcxx_digits10 (short);
937#if __cplusplus201703L >= 201103L
938 static constexpr int max_digits10 = 0;
939#endif
940 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = true;
941 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
942 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
943 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
944
945 static _GLIBCXX_CONSTEXPRconstexpr short
946 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
947
948 static _GLIBCXX_CONSTEXPRconstexpr short
949 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
950
951 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
952 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
953 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
954 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
955
956 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
957 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
958 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
959 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
960 = denorm_absent;
961 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
962
963 static _GLIBCXX_CONSTEXPRconstexpr short
964 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return short(); }
965
966 static _GLIBCXX_CONSTEXPRconstexpr short
967 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return short(); }
968
969 static _GLIBCXX_CONSTEXPRconstexpr short
970 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return short(); }
971
972 static _GLIBCXX_CONSTEXPRconstexpr short
973 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return short(); }
974
975 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
976 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
977 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false;
978
979 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
980 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
981 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
982 = round_toward_zero;
983 };
984
985 /// numeric_limits<unsigned short> specialization.
986 template<>
987 struct numeric_limits<unsigned short>
988 {
989 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
990
991 static _GLIBCXX_CONSTEXPRconstexpr unsigned short
992 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
993
994 static _GLIBCXX_CONSTEXPRconstexpr unsigned short
995 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __SHRT_MAX__32767 * 2U + 1; }
996
997#if __cplusplus201703L >= 201103L
998 static constexpr unsigned short
999 lowest() noexcept { return min(); }
1000#endif
1001
1002 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits
1003 = __glibcxx_digits (unsigned short);
1004 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10
1005 = __glibcxx_digits10 (unsigned short);
1006#if __cplusplus201703L >= 201103L
1007 static constexpr int max_digits10 = 0;
1008#endif
1009 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = false;
1010 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
1011 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
1012 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
1013
1014 static _GLIBCXX_CONSTEXPRconstexpr unsigned short
1015 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1016
1017 static _GLIBCXX_CONSTEXPRconstexpr unsigned short
1018 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1019
1020 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
1021 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
1022 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
1023 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
1024
1025 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
1026 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
1027 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
1028 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
1029 = denorm_absent;
1030 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
1031
1032 static _GLIBCXX_CONSTEXPRconstexpr unsigned short
1033 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept
1034 { return static_cast<unsigned short>(0); }
1035
1036 static _GLIBCXX_CONSTEXPRconstexpr unsigned short
1037 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
1038 { return static_cast<unsigned short>(0); }
1039
1040 static _GLIBCXX_CONSTEXPRconstexpr unsigned short
1041 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
1042 { return static_cast<unsigned short>(0); }
1043
1044 static _GLIBCXX_CONSTEXPRconstexpr unsigned short
1045 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept
1046 { return static_cast<unsigned short>(0); }
1047
1048 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
1049 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
1050 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = true;
1051
1052 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
1053 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
1054 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
1055 = round_toward_zero;
1056 };
1057
1058 /// numeric_limits<int> specialization.
1059 template<>
1060 struct numeric_limits<int>
1061 {
1062 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
1063
1064 static _GLIBCXX_CONSTEXPRconstexpr int
1065 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return -__INT_MAX__2147483647 - 1; }
1066
1067 static _GLIBCXX_CONSTEXPRconstexpr int
1068 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __INT_MAX__2147483647; }
1069
1070#if __cplusplus201703L >= 201103L
1071 static constexpr int
1072 lowest() noexcept { return min(); }
1073#endif
1074
1075 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = __glibcxx_digits (int);
1076 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 = __glibcxx_digits10 (int);
1077#if __cplusplus201703L >= 201103L
1078 static constexpr int max_digits10 = 0;
1079#endif
1080 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = true;
1081 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
1082 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
1083 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
1084
1085 static _GLIBCXX_CONSTEXPRconstexpr int
1086 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1087
1088 static _GLIBCXX_CONSTEXPRconstexpr int
1089 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1090
1091 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
1092 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
1093 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
1094 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
1095
1096 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
1097 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
1098 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
1099 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
1100 = denorm_absent;
1101 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
1102
1103 static _GLIBCXX_CONSTEXPRconstexpr int
1104 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<int>(0); }
1105
1106 static _GLIBCXX_CONSTEXPRconstexpr int
1107 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<int>(0); }
1108
1109 static _GLIBCXX_CONSTEXPRconstexpr int
1110 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<int>(0); }
1111
1112 static _GLIBCXX_CONSTEXPRconstexpr int
1113 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<int>(0); }
1114
1115 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
1116 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
1117 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false;
1118
1119 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
1120 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
1121 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
1122 = round_toward_zero;
1123 };
1124
1125 /// numeric_limits<unsigned int> specialization.
1126 template<>
1127 struct numeric_limits<unsigned int>
1128 {
1129 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
1130
1131 static _GLIBCXX_CONSTEXPRconstexpr unsigned int
1132 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1133
1134 static _GLIBCXX_CONSTEXPRconstexpr unsigned int
1135 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __INT_MAX__2147483647 * 2U + 1; }
1136
1137#if __cplusplus201703L >= 201103L
1138 static constexpr unsigned int
1139 lowest() noexcept { return min(); }
1140#endif
1141
1142 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits
1143 = __glibcxx_digits (unsigned int);
1144 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10
1145 = __glibcxx_digits10 (unsigned int);
1146#if __cplusplus201703L >= 201103L
1147 static constexpr int max_digits10 = 0;
1148#endif
1149 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = false;
1150 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
1151 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
1152 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
1153
1154 static _GLIBCXX_CONSTEXPRconstexpr unsigned int
1155 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1156
1157 static _GLIBCXX_CONSTEXPRconstexpr unsigned int
1158 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1159
1160 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
1161 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
1162 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
1163 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
1164
1165 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
1166 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
1167 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
1168 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
1169 = denorm_absent;
1170 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
1171
1172 static _GLIBCXX_CONSTEXPRconstexpr unsigned int
1173 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<unsigned int>(0); }
1174
1175 static _GLIBCXX_CONSTEXPRconstexpr unsigned int
1176 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
1177 { return static_cast<unsigned int>(0); }
1178
1179 static _GLIBCXX_CONSTEXPRconstexpr unsigned int
1180 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
1181 { return static_cast<unsigned int>(0); }
1182
1183 static _GLIBCXX_CONSTEXPRconstexpr unsigned int
1184 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept
1185 { return static_cast<unsigned int>(0); }
1186
1187 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
1188 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
1189 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = true;
1190
1191 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
1192 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
1193 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
1194 = round_toward_zero;
1195 };
1196
1197 /// numeric_limits<long> specialization.
1198 template<>
1199 struct numeric_limits<long>
1200 {
1201 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
1202
1203 static _GLIBCXX_CONSTEXPRconstexpr long
1204 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return -__LONG_MAX__9223372036854775807L - 1; }
1205
1206 static _GLIBCXX_CONSTEXPRconstexpr long
1207 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __LONG_MAX__9223372036854775807L; }
1208
1209#if __cplusplus201703L >= 201103L
1210 static constexpr long
1211 lowest() noexcept { return min(); }
1212#endif
1213
1214 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = __glibcxx_digits (long);
1215 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 = __glibcxx_digits10 (long);
1216#if __cplusplus201703L >= 201103L
1217 static constexpr int max_digits10 = 0;
1218#endif
1219 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = true;
1220 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
1221 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
1222 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
1223
1224 static _GLIBCXX_CONSTEXPRconstexpr long
1225 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1226
1227 static _GLIBCXX_CONSTEXPRconstexpr long
1228 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1229
1230 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
1231 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
1232 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
1233 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
1234
1235 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
1236 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
1237 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
1238 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
1239 = denorm_absent;
1240 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
1241
1242 static _GLIBCXX_CONSTEXPRconstexpr long
1243 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<long>(0); }
1244
1245 static _GLIBCXX_CONSTEXPRconstexpr long
1246 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<long>(0); }
1247
1248 static _GLIBCXX_CONSTEXPRconstexpr long
1249 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<long>(0); }
1250
1251 static _GLIBCXX_CONSTEXPRconstexpr long
1252 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<long>(0); }
1253
1254 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
1255 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
1256 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false;
1257
1258 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
1259 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
1260 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
1261 = round_toward_zero;
1262 };
1263
1264 /// numeric_limits<unsigned long> specialization.
1265 template<>
1266 struct numeric_limits<unsigned long>
1267 {
1268 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
1269
1270 static _GLIBCXX_CONSTEXPRconstexpr unsigned long
1271 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1272
1273 static _GLIBCXX_CONSTEXPRconstexpr unsigned long
1274 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __LONG_MAX__9223372036854775807L * 2UL + 1; }
16
Returning the value 18446744073709551615
1275
1276#if __cplusplus201703L >= 201103L
1277 static constexpr unsigned long
1278 lowest() noexcept { return min(); }
1279#endif
1280
1281 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits
1282 = __glibcxx_digits (unsigned long);
1283 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10
1284 = __glibcxx_digits10 (unsigned long);
1285#if __cplusplus201703L >= 201103L
1286 static constexpr int max_digits10 = 0;
1287#endif
1288 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = false;
1289 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
1290 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
1291 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
1292
1293 static _GLIBCXX_CONSTEXPRconstexpr unsigned long
1294 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1295
1296 static _GLIBCXX_CONSTEXPRconstexpr unsigned long
1297 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1298
1299 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
1300 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
1301 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
1302 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
1303
1304 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
1305 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
1306 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
1307 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
1308 = denorm_absent;
1309 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
1310
1311 static _GLIBCXX_CONSTEXPRconstexpr unsigned long
1312 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept
1313 { return static_cast<unsigned long>(0); }
1314
1315 static _GLIBCXX_CONSTEXPRconstexpr unsigned long
1316 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
1317 { return static_cast<unsigned long>(0); }
1318
1319 static _GLIBCXX_CONSTEXPRconstexpr unsigned long
1320 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
1321 { return static_cast<unsigned long>(0); }
1322
1323 static _GLIBCXX_CONSTEXPRconstexpr unsigned long
1324 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept
1325 { return static_cast<unsigned long>(0); }
1326
1327 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
1328 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
1329 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = true;
1330
1331 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
1332 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
1333 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
1334 = round_toward_zero;
1335 };
1336
1337 /// numeric_limits<long long> specialization.
1338 template<>
1339 struct numeric_limits<long long>
1340 {
1341 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
1342
1343 static _GLIBCXX_CONSTEXPRconstexpr long long
1344 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return -__LONG_LONG_MAX__9223372036854775807LL - 1; }
1345
1346 static _GLIBCXX_CONSTEXPRconstexpr long long
1347 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __LONG_LONG_MAX__9223372036854775807LL; }
1348
1349#if __cplusplus201703L >= 201103L
1350 static constexpr long long
1351 lowest() noexcept { return min(); }
1352#endif
1353
1354 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits
1355 = __glibcxx_digits (long long);
1356 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10
1357 = __glibcxx_digits10 (long long);
1358#if __cplusplus201703L >= 201103L
1359 static constexpr int max_digits10 = 0;
1360#endif
1361 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = true;
1362 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
1363 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
1364 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
1365
1366 static _GLIBCXX_CONSTEXPRconstexpr long long
1367 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1368
1369 static _GLIBCXX_CONSTEXPRconstexpr long long
1370 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1371
1372 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
1373 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
1374 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
1375 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
1376
1377 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
1378 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
1379 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
1380 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
1381 = denorm_absent;
1382 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
1383
1384 static _GLIBCXX_CONSTEXPRconstexpr long long
1385 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<long long>(0); }
1386
1387 static _GLIBCXX_CONSTEXPRconstexpr long long
1388 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<long long>(0); }
1389
1390 static _GLIBCXX_CONSTEXPRconstexpr long long
1391 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
1392 { return static_cast<long long>(0); }
1393
1394 static _GLIBCXX_CONSTEXPRconstexpr long long
1395 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<long long>(0); }
1396
1397 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
1398 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
1399 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false;
1400
1401 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
1402 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
1403 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
1404 = round_toward_zero;
1405 };
1406
1407 /// numeric_limits<unsigned long long> specialization.
1408 template<>
1409 struct numeric_limits<unsigned long long>
1410 {
1411 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
1412
1413 static _GLIBCXX_CONSTEXPRconstexpr unsigned long long
1414 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1415
1416 static _GLIBCXX_CONSTEXPRconstexpr unsigned long long
1417 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __LONG_LONG_MAX__9223372036854775807LL * 2ULL + 1; }
1418
1419#if __cplusplus201703L >= 201103L
1420 static constexpr unsigned long long
1421 lowest() noexcept { return min(); }
1422#endif
1423
1424 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits
1425 = __glibcxx_digits (unsigned long long);
1426 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10
1427 = __glibcxx_digits10 (unsigned long long);
1428#if __cplusplus201703L >= 201103L
1429 static constexpr int max_digits10 = 0;
1430#endif
1431 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = false;
1432 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
1433 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
1434 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
1435
1436 static _GLIBCXX_CONSTEXPRconstexpr unsigned long long
1437 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1438
1439 static _GLIBCXX_CONSTEXPRconstexpr unsigned long long
1440 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1441
1442 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
1443 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
1444 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
1445 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
1446
1447 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
1448 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
1449 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
1450 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
1451 = denorm_absent;
1452 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
1453
1454 static _GLIBCXX_CONSTEXPRconstexpr unsigned long long
1455 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept
1456 { return static_cast<unsigned long long>(0); }
1457
1458 static _GLIBCXX_CONSTEXPRconstexpr unsigned long long
1459 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
1460 { return static_cast<unsigned long long>(0); }
1461
1462 static _GLIBCXX_CONSTEXPRconstexpr unsigned long long
1463 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
1464 { return static_cast<unsigned long long>(0); }
1465
1466 static _GLIBCXX_CONSTEXPRconstexpr unsigned long long
1467 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept
1468 { return static_cast<unsigned long long>(0); }
1469
1470 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
1471 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
1472 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = true;
1473
1474 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
1475 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
1476 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
1477 = round_toward_zero;
1478 };
1479
1480#define __INT_N(TYPE, BITSIZE, EXT, UEXT) \
1481 template<> \
1482 struct numeric_limits<TYPE> \
1483 { \
1484 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true; \
1485 \
1486 static _GLIBCXX_CONSTEXPRconstexpr TYPE \
1487 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return __glibcxx_min_b (TYPE, BITSIZE)(((TYPE)(-1) < 0) ? -(((TYPE)(-1) < 0) ? (((((TYPE)1 <<
((BITSIZE - ((TYPE)(-1) < 0)) - 1)) - 1) << 1) + 1)
: ~(TYPE)0) - 1 : (TYPE)0)
; } \
1488 \
1489 static _GLIBCXX_CONSTEXPRconstexpr TYPE \
1490 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __glibcxx_max_b (TYPE, BITSIZE)(((TYPE)(-1) < 0) ? (((((TYPE)1 << ((BITSIZE - ((TYPE
)(-1) < 0)) - 1)) - 1) << 1) + 1) : ~(TYPE)0)
; } \
1491 \
1492 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits \
1493 = BITSIZE - 1; \
1494 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 \
1495 = (BITSIZE - 1) * 643L / 2136; \
1496 \
1497 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = true; \
1498 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true; \
1499 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true; \
1500 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2; \
1501 \
1502 static _GLIBCXX_CONSTEXPRconstexpr TYPE \
1503 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; } \
1504 \
1505 static _GLIBCXX_CONSTEXPRconstexpr TYPE \
1506 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; } \
1507 \
1508 EXT \
1509 \
1510 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0; \
1511 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0; \
1512 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0; \
1513 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0; \
1514 \
1515 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false; \
1516 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false; \
1517 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false; \
1518 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm \
1519 = denorm_absent; \
1520 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false; \
1521 \
1522 static _GLIBCXX_CONSTEXPRconstexpr TYPE \
1523 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept \
1524 { return static_cast<TYPE>(0); } \
1525 \
1526 static _GLIBCXX_CONSTEXPRconstexpr TYPE \
1527 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept \
1528 { return static_cast<TYPE>(0); } \
1529 \
1530 static _GLIBCXX_CONSTEXPRconstexpr TYPE \
1531 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept \
1532 { return static_cast<TYPE>(0); } \
1533 \
1534 static _GLIBCXX_CONSTEXPRconstexpr TYPE \
1535 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept \
1536 { return static_cast<TYPE>(0); } \
1537 \
1538 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false; \
1539 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true; \
1540 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false; \
1541 \
1542 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps \
1543 = __glibcxx_integral_trapstrue; \
1544 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false; \
1545 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style \
1546 = round_toward_zero; \
1547 }; \
1548 \
1549 template<> \
1550 struct numeric_limits<unsigned TYPE> \
1551 { \
1552 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true; \
1553 \
1554 static _GLIBCXX_CONSTEXPRconstexpr unsigned TYPE \
1555 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; } \
1556 \
1557 static _GLIBCXX_CONSTEXPRconstexpr unsigned TYPE \
1558 max() _GLIBCXX_USE_NOEXCEPTnoexcept \
1559 { return __glibcxx_max_b (unsigned TYPE, BITSIZE)(((unsigned TYPE)(-1) < 0) ? (((((unsigned TYPE)1 <<
((BITSIZE - ((unsigned TYPE)(-1) < 0)) - 1)) - 1) <<
1) + 1) : ~(unsigned TYPE)0)
; } \
1560 \
1561 UEXT \
1562 \
1563 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits \
1564 = BITSIZE; \
1565 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 \
1566 = BITSIZE * 643L / 2136; \
1567 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = false; \
1568 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true; \
1569 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true; \
1570 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2; \
1571 \
1572 static _GLIBCXX_CONSTEXPRconstexpr unsigned TYPE \
1573 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; } \
1574 \
1575 static _GLIBCXX_CONSTEXPRconstexpr unsigned TYPE \
1576 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; } \
1577 \
1578 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0; \
1579 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0; \
1580 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0; \
1581 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0; \
1582 \
1583 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false; \
1584 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false; \
1585 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false; \
1586 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm \
1587 = denorm_absent; \
1588 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false; \
1589 \
1590 static _GLIBCXX_CONSTEXPRconstexpr unsigned TYPE \
1591 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept \
1592 { return static_cast<unsigned TYPE>(0); } \
1593 \
1594 static _GLIBCXX_CONSTEXPRconstexpr unsigned TYPE \
1595 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept \
1596 { return static_cast<unsigned TYPE>(0); } \
1597 \
1598 static _GLIBCXX_CONSTEXPRconstexpr unsigned TYPE \
1599 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept \
1600 { return static_cast<unsigned TYPE>(0); } \
1601 \
1602 static _GLIBCXX_CONSTEXPRconstexpr unsigned TYPE \
1603 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept \
1604 { return static_cast<unsigned TYPE>(0); } \
1605 \
1606 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false; \
1607 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true; \
1608 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = true; \
1609 \
1610 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue; \
1611 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false; \
1612 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style \
1613 = round_toward_zero; \
1614 };
1615
1616#if __cplusplus201703L >= 201103L
1617
1618#define __INT_N_201103(TYPE) \
1619 static constexpr TYPE \
1620 lowest() noexcept { return min(); } \
1621 static constexpr int max_digits10 = 0;
1622
1623#define __INT_N_U201103(TYPE) \
1624 static constexpr unsigned TYPE \
1625 lowest() noexcept { return min(); } \
1626 static constexpr int max_digits10 = 0;
1627
1628#else
1629#define __INT_N_201103(TYPE)
1630#define __INT_N_U201103(TYPE)
1631#endif
1632
1633#if !defined(__STRICT_ANSI__1)
1634#ifdef __GLIBCXX_TYPE_INT_N_0
1635 __INT_N(__GLIBCXX_TYPE_INT_N_0, __GLIBCXX_BITSIZE_INT_N_0,
1636 __INT_N_201103 (__GLIBCXX_TYPE_INT_N_0), __INT_N_U201103 (__GLIBCXX_TYPE_INT_N_0))
1637#endif
1638#ifdef __GLIBCXX_TYPE_INT_N_1
1639 __INT_N (__GLIBCXX_TYPE_INT_N_1, __GLIBCXX_BITSIZE_INT_N_1,
1640 __INT_N_201103 (__GLIBCXX_TYPE_INT_N_1), __INT_N_U201103 (__GLIBCXX_TYPE_INT_N_1))
1641#endif
1642#ifdef __GLIBCXX_TYPE_INT_N_2
1643 __INT_N (__GLIBCXX_TYPE_INT_N_2, __GLIBCXX_BITSIZE_INT_N_2,
1644 __INT_N_201103 (__GLIBCXX_TYPE_INT_N_2), __INT_N_U201103 (__GLIBCXX_TYPE_INT_N_2))
1645#endif
1646#ifdef __GLIBCXX_TYPE_INT_N_3
1647 __INT_N (__GLIBCXX_TYPE_INT_N_3, __GLIBCXX_BITSIZE_INT_N_3,
1648 __INT_N_201103 (__GLIBCXX_TYPE_INT_N_3), __INT_N_U201103 (__GLIBCXX_TYPE_INT_N_3))
1649#endif
1650
1651#elif defined __STRICT_ANSI__1 && defined __SIZEOF_INT128__16
1652 __INT_N(__int128, 128,
1653 __INT_N_201103 (__int128),
1654 __INT_N_U201103 (__int128))
1655#endif
1656
1657#undef __INT_N
1658#undef __INT_N_201103
1659#undef __INT_N_U201103
1660
1661
1662 /// numeric_limits<float> specialization.
1663 template<>
1664 struct numeric_limits<float>
1665 {
1666 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
1667
1668 static _GLIBCXX_CONSTEXPRconstexpr float
1669 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return __FLT_MIN__1.17549435e-38F; }
1670
1671 static _GLIBCXX_CONSTEXPRconstexpr float
1672 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __FLT_MAX__3.40282347e+38F; }
1673
1674#if __cplusplus201703L >= 201103L
1675 static constexpr float
1676 lowest() noexcept { return -__FLT_MAX__3.40282347e+38F; }
1677#endif
1678
1679 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = __FLT_MANT_DIG__24;
1680 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 = __FLT_DIG__6;
1681#if __cplusplus201703L >= 201103L
1682 static constexpr int max_digits10
1683 = __glibcxx_max_digits10 (__FLT_MANT_DIG__24);
1684#endif
1685 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = true;
1686 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = false;
1687 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = false;
1688 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = __FLT_RADIX__2;
1689
1690 static _GLIBCXX_CONSTEXPRconstexpr float
1691 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return __FLT_EPSILON__1.19209290e-7F; }
1692
1693 static _GLIBCXX_CONSTEXPRconstexpr float
1694 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0.5F; }
1695
1696 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = __FLT_MIN_EXP__(-125);
1697 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = __FLT_MIN_10_EXP__(-37);
1698 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = __FLT_MAX_EXP__128;
1699 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = __FLT_MAX_10_EXP__38;
1700
1701 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = __FLT_HAS_INFINITY__1;
1702 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = __FLT_HAS_QUIET_NAN__1;
1703 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = has_quiet_NaN;
1704 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
1705 = bool(__FLT_HAS_DENORM__1) ? denorm_present : denorm_absent;
1706 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss
1707 = __glibcxx_float_has_denorm_loss;
1708
1709 static _GLIBCXX_CONSTEXPRconstexpr float
1710 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return __builtin_huge_valf(); }
1711
1712 static _GLIBCXX_CONSTEXPRconstexpr float
1713 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return __builtin_nanf(""); }
1714
1715 static _GLIBCXX_CONSTEXPRconstexpr float
1716 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return __builtin_nansf(""); }
1717
1718 static _GLIBCXX_CONSTEXPRconstexpr float
1719 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return __FLT_DENORM_MIN__1.40129846e-45F; }
1720
1721 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559
1722 = has_infinity && has_quiet_NaN && has_denorm == denorm_present;
1723 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
1724 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false;
1725
1726 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_float_traps;
1727 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before
1728 = __glibcxx_float_tinyness_before;
1729 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
1730 = round_to_nearest;
1731 };
1732
1733#undef __glibcxx_float_has_denorm_loss
1734#undef __glibcxx_float_traps
1735#undef __glibcxx_float_tinyness_before
1736
1737 /// numeric_limits<double> specialization.
1738 template<>
1739 struct numeric_limits<double>
1740 {
1741 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
1742
1743 static _GLIBCXX_CONSTEXPRconstexpr double
1744 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return __DBL_MIN__2.2250738585072014e-308; }
1745
1746 static _GLIBCXX_CONSTEXPRconstexpr double
1747 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __DBL_MAX__1.7976931348623157e+308; }
1748
1749#if __cplusplus201703L >= 201103L
1750 static constexpr double
1751 lowest() noexcept { return -__DBL_MAX__1.7976931348623157e+308; }
1752#endif
1753
1754 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = __DBL_MANT_DIG__53;
1755 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 = __DBL_DIG__15;
1756#if __cplusplus201703L >= 201103L
1757 static constexpr int max_digits10
1758 = __glibcxx_max_digits10 (__DBL_MANT_DIG__53);
1759#endif
1760 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = true;
1761 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = false;
1762 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = false;
1763 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = __FLT_RADIX__2;
1764
1765 static _GLIBCXX_CONSTEXPRconstexpr double
1766 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return __DBL_EPSILON__2.2204460492503131e-16; }
1767
1768 static _GLIBCXX_CONSTEXPRconstexpr double
1769 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0.5; }
1770
1771 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = __DBL_MIN_EXP__(-1021);
1772 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = __DBL_MIN_10_EXP__(-307);
1773 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = __DBL_MAX_EXP__1024;
1774 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = __DBL_MAX_10_EXP__308;
1775
1776 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = __DBL_HAS_INFINITY__1;
1777 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = __DBL_HAS_QUIET_NAN__1;
1778 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = has_quiet_NaN;
1779 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
1780 = bool(__DBL_HAS_DENORM__1) ? denorm_present : denorm_absent;
1781 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss
1782 = __glibcxx_double_has_denorm_loss;
1783
1784 static _GLIBCXX_CONSTEXPRconstexpr double
1785 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return __builtin_huge_val(); }
1786
1787 static _GLIBCXX_CONSTEXPRconstexpr double
1788 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return __builtin_nan(""); }
1789
1790 static _GLIBCXX_CONSTEXPRconstexpr double
1791 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return __builtin_nans(""); }
1792
1793 static _GLIBCXX_CONSTEXPRconstexpr double
1794 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return __DBL_DENORM_MIN__4.9406564584124654e-324; }
1795
1796 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559
1797 = has_infinity && has_quiet_NaN && has_denorm == denorm_present;
1798 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
1799 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false;
1800
1801 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_double_traps;
1802 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before
1803 = __glibcxx_double_tinyness_before;
1804 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
1805 = round_to_nearest;
1806 };
1807
1808#undef __glibcxx_double_has_denorm_loss
1809#undef __glibcxx_double_traps
1810#undef __glibcxx_double_tinyness_before
1811
1812 /// numeric_limits<long double> specialization.
1813 template<>
1814 struct numeric_limits<long double>
1815 {
1816 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
1817
1818 static _GLIBCXX_CONSTEXPRconstexpr long double
1819 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return __LDBL_MIN__3.36210314311209350626e-4932L; }
1820
1821 static _GLIBCXX_CONSTEXPRconstexpr long double
1822 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __LDBL_MAX__1.18973149535723176502e+4932L; }
1823
1824#if __cplusplus201703L >= 201103L
1825 static constexpr long double
1826 lowest() noexcept { return -__LDBL_MAX__1.18973149535723176502e+4932L; }
1827#endif
1828
1829 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = __LDBL_MANT_DIG__64;
1830 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 = __LDBL_DIG__18;
1831#if __cplusplus201703L >= 201103L
1832 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_digits10
1833 = __glibcxx_max_digits10 (__LDBL_MANT_DIG__64);
1834#endif
1835 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = true;
1836 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = false;
1837 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = false;
1838 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = __FLT_RADIX__2;
1839
1840 static _GLIBCXX_CONSTEXPRconstexpr long double
1841 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return __LDBL_EPSILON__1.08420217248550443401e-19L; }
1842
1843 static _GLIBCXX_CONSTEXPRconstexpr long double
1844 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0.5L; }
1845
1846 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = __LDBL_MIN_EXP__(-16381);
1847 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = __LDBL_MIN_10_EXP__(-4931);
1848 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = __LDBL_MAX_EXP__16384;
1849 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = __LDBL_MAX_10_EXP__4932;
1850
1851 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = __LDBL_HAS_INFINITY__1;
1852 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = __LDBL_HAS_QUIET_NAN__1;
1853 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = has_quiet_NaN;
1854 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
1855 = bool(__LDBL_HAS_DENORM__1) ? denorm_present : denorm_absent;
1856 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss
1857 = __glibcxx_long_double_has_denorm_loss;
1858
1859 static _GLIBCXX_CONSTEXPRconstexpr long double
1860 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return __builtin_huge_vall(); }
1861
1862 static _GLIBCXX_CONSTEXPRconstexpr long double
1863 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return __builtin_nanl(""); }
1864
1865 static _GLIBCXX_CONSTEXPRconstexpr long double
1866 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return __builtin_nansl(""); }
1867
1868 static _GLIBCXX_CONSTEXPRconstexpr long double
1869 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return __LDBL_DENORM_MIN__3.64519953188247460253e-4951L; }
1870
1871 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559
1872 = has_infinity && has_quiet_NaN && has_denorm == denorm_present;
1873 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
1874 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false;
1875
1876 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_long_double_traps;
1877 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before =
1878 __glibcxx_long_double_tinyness_before;
1879 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style =
1880 round_to_nearest;
1881 };
1882
1883#undef __glibcxx_long_double_has_denorm_loss
1884#undef __glibcxx_long_double_traps
1885#undef __glibcxx_long_double_tinyness_before
1886
1887_GLIBCXX_END_NAMESPACE_VERSION
1888} // namespace
1889
1890#undef __glibcxx_signed
1891#undef __glibcxx_min
1892#undef __glibcxx_max
1893#undef __glibcxx_digits
1894#undef __glibcxx_digits10
1895#undef __glibcxx_max_digits10
1896
1897#endif // _GLIBCXX_NUMERIC_LIMITS