Bug Summary

File:build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
Warning:line 780, column 36
The result of the left shift is undefined due to shifting by '18446744073709551615', which is greater or equal to the width of type 'uint64_t'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name SystemZISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-15/lib/clang/15.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/SystemZ -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/SystemZ -I include -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-15/lib/clang/15.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-04-20-140412-16051-1 -x c++ /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
15#include "SystemZConstantPoolValue.h"
16#include "SystemZMachineFunctionInfo.h"
17#include "SystemZTargetMachine.h"
18#include "llvm/CodeGen/CallingConvLower.h"
19#include "llvm/CodeGen/MachineInstrBuilder.h"
20#include "llvm/CodeGen/MachineRegisterInfo.h"
21#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
22#include "llvm/IR/IntrinsicInst.h"
23#include "llvm/IR/Intrinsics.h"
24#include "llvm/IR/IntrinsicsS390.h"
25#include "llvm/Support/CommandLine.h"
26#include "llvm/Support/KnownBits.h"
27#include <cctype>
28
29using namespace llvm;
30
31#define DEBUG_TYPE"systemz-lower" "systemz-lower"
32
33namespace {
34// Represents information about a comparison.
35struct Comparison {
36 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
37 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
38 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
39
40 // The operands to the comparison.
41 SDValue Op0, Op1;
42
43 // Chain if this is a strict floating-point comparison.
44 SDValue Chain;
45
46 // The opcode that should be used to compare Op0 and Op1.
47 unsigned Opcode;
48
49 // A SystemZICMP value. Only used for integer comparisons.
50 unsigned ICmpType;
51
52 // The mask of CC values that Opcode can produce.
53 unsigned CCValid;
54
55 // The mask of CC values for which the original condition is true.
56 unsigned CCMask;
57};
58} // end anonymous namespace
59
60// Classify VT as either 32 or 64 bit.
61static bool is32Bit(EVT VT) {
62 switch (VT.getSimpleVT().SimpleTy) {
63 case MVT::i32:
64 return true;
65 case MVT::i64:
66 return false;
67 default:
68 llvm_unreachable("Unsupported type")::llvm::llvm_unreachable_internal("Unsupported type", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 68)
;
69 }
70}
71
72// Return a version of MachineOperand that can be safely used before the
73// final use.
74static MachineOperand earlyUseOperand(MachineOperand Op) {
75 if (Op.isReg())
76 Op.setIsKill(false);
77 return Op;
78}
79
80SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
81 const SystemZSubtarget &STI)
82 : TargetLowering(TM), Subtarget(STI) {
83 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
84
85 auto *Regs = STI.getSpecialRegisters();
86
87 // Set up the register classes.
88 if (Subtarget.hasHighWord())
89 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
90 else
91 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
92 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
93 if (!useSoftFloat()) {
94 if (Subtarget.hasVector()) {
95 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
96 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
97 } else {
98 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
99 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
100 }
101 if (Subtarget.hasVectorEnhancements1())
102 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
103 else
104 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
105
106 if (Subtarget.hasVector()) {
107 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
108 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
109 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
110 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
111 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
112 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
113 }
114 }
115
116 // Compute derived properties from the register classes
117 computeRegisterProperties(Subtarget.getRegisterInfo());
118
119 // Set up special registers.
120 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
121
122 // TODO: It may be better to default to latency-oriented scheduling, however
123 // LLVM's current latency-oriented scheduler can't handle physreg definitions
124 // such as SystemZ has with CC, so set this to the register-pressure
125 // scheduler, because it can.
126 setSchedulingPreference(Sched::RegPressure);
127
128 setBooleanContents(ZeroOrOneBooleanContent);
129 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
130
131 // Instructions are strings of 2-byte aligned 2-byte values.
132 setMinFunctionAlignment(Align(2));
133 // For performance reasons we prefer 16-byte alignment.
134 setPrefFunctionAlignment(Align(16));
135
136 // Handle operations that are handled in a similar way for all types.
137 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
138 I <= MVT::LAST_FP_VALUETYPE;
139 ++I) {
140 MVT VT = MVT::SimpleValueType(I);
141 if (isTypeLegal(VT)) {
142 // Lower SET_CC into an IPM-based sequence.
143 setOperationAction(ISD::SETCC, VT, Custom);
144 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
145 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
146
147 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
148 setOperationAction(ISD::SELECT, VT, Expand);
149
150 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
151 setOperationAction(ISD::SELECT_CC, VT, Custom);
152 setOperationAction(ISD::BR_CC, VT, Custom);
153 }
154 }
155
156 // Expand jump table branches as address arithmetic followed by an
157 // indirect jump.
158 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
159
160 // Expand BRCOND into a BR_CC (see above).
161 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
162
163 // Handle integer types.
164 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
165 I <= MVT::LAST_INTEGER_VALUETYPE;
166 ++I) {
167 MVT VT = MVT::SimpleValueType(I);
168 if (isTypeLegal(VT)) {
169 setOperationAction(ISD::ABS, VT, Legal);
170
171 // Expand individual DIV and REMs into DIVREMs.
172 setOperationAction(ISD::SDIV, VT, Expand);
173 setOperationAction(ISD::UDIV, VT, Expand);
174 setOperationAction(ISD::SREM, VT, Expand);
175 setOperationAction(ISD::UREM, VT, Expand);
176 setOperationAction(ISD::SDIVREM, VT, Custom);
177 setOperationAction(ISD::UDIVREM, VT, Custom);
178
179 // Support addition/subtraction with overflow.
180 setOperationAction(ISD::SADDO, VT, Custom);
181 setOperationAction(ISD::SSUBO, VT, Custom);
182
183 // Support addition/subtraction with carry.
184 setOperationAction(ISD::UADDO, VT, Custom);
185 setOperationAction(ISD::USUBO, VT, Custom);
186
187 // Support carry in as value rather than glue.
188 setOperationAction(ISD::ADDCARRY, VT, Custom);
189 setOperationAction(ISD::SUBCARRY, VT, Custom);
190
191 // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
192 // stores, putting a serialization instruction after the stores.
193 setOperationAction(ISD::ATOMIC_LOAD, VT, Custom);
194 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
195
196 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
197 // available, or if the operand is constant.
198 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
199
200 // Use POPCNT on z196 and above.
201 if (Subtarget.hasPopulationCount())
202 setOperationAction(ISD::CTPOP, VT, Custom);
203 else
204 setOperationAction(ISD::CTPOP, VT, Expand);
205
206 // No special instructions for these.
207 setOperationAction(ISD::CTTZ, VT, Expand);
208 setOperationAction(ISD::ROTR, VT, Expand);
209
210 // Use *MUL_LOHI where possible instead of MULH*.
211 setOperationAction(ISD::MULHS, VT, Expand);
212 setOperationAction(ISD::MULHU, VT, Expand);
213 setOperationAction(ISD::SMUL_LOHI, VT, Custom);
214 setOperationAction(ISD::UMUL_LOHI, VT, Custom);
215
216 // Only z196 and above have native support for conversions to unsigned.
217 // On z10, promoting to i64 doesn't generate an inexact condition for
218 // values that are outside the i32 range but in the i64 range, so use
219 // the default expansion.
220 if (!Subtarget.hasFPExtension())
221 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
222
223 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
224 // default to Expand, so need to be modified to Legal where appropriate.
225 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal);
226 if (Subtarget.hasFPExtension())
227 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal);
228
229 // And similarly for STRICT_[SU]INT_TO_FP.
230 setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal);
231 if (Subtarget.hasFPExtension())
232 setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal);
233 }
234 }
235
236 // Type legalization will convert 8- and 16-bit atomic operations into
237 // forms that operate on i32s (but still keeping the original memory VT).
238 // Lower them into full i32 operations.
239 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom);
240 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom);
241 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
242 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
243 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom);
244 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom);
245 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
246 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom);
247 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom);
248 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
249 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
250
251 // Even though i128 is not a legal type, we still need to custom lower
252 // the atomic operations in order to exploit SystemZ instructions.
253 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
254 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
255
256 // We can use the CC result of compare-and-swap to implement
257 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
258 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom);
259 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom);
260 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
261
262 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
263
264 // Traps are legal, as we will convert them to "j .+2".
265 setOperationAction(ISD::TRAP, MVT::Other, Legal);
266
267 // z10 has instructions for signed but not unsigned FP conversion.
268 // Handle unsigned 32-bit types as signed 64-bit types.
269 if (!Subtarget.hasFPExtension()) {
270 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
271 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
272 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Promote);
273 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
274 }
275
276 // We have native support for a 64-bit CTLZ, via FLOGR.
277 setOperationAction(ISD::CTLZ, MVT::i32, Promote);
278 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
279 setOperationAction(ISD::CTLZ, MVT::i64, Legal);
280
281 // On z15 we have native support for a 64-bit CTPOP.
282 if (Subtarget.hasMiscellaneousExtensions3()) {
283 setOperationAction(ISD::CTPOP, MVT::i32, Promote);
284 setOperationAction(ISD::CTPOP, MVT::i64, Legal);
285 }
286
287 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
288 setOperationAction(ISD::OR, MVT::i64, Custom);
289
290 // Expand 128 bit shifts without using a libcall.
291 setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
292 setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
293 setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
294 setLibcallName(RTLIB::SRL_I128, nullptr);
295 setLibcallName(RTLIB::SHL_I128, nullptr);
296 setLibcallName(RTLIB::SRA_I128, nullptr);
297
298 // Handle bitcast from fp128 to i128.
299 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
300
301 // We have native instructions for i8, i16 and i32 extensions, but not i1.
302 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
303 for (MVT VT : MVT::integer_valuetypes()) {
304 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
305 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
306 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
307 }
308
309 // Handle the various types of symbolic address.
310 setOperationAction(ISD::ConstantPool, PtrVT, Custom);
311 setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
312 setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
313 setOperationAction(ISD::BlockAddress, PtrVT, Custom);
314 setOperationAction(ISD::JumpTable, PtrVT, Custom);
315
316 // We need to handle dynamic allocations specially because of the
317 // 160-byte area at the bottom of the stack.
318 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
319 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom);
320
321 setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
322 setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
323
324 // Handle prefetches with PFD or PFDRL.
325 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
326
327 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
328 // Assume by default that all vector operations need to be expanded.
329 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
330 if (getOperationAction(Opcode, VT) == Legal)
331 setOperationAction(Opcode, VT, Expand);
332
333 // Likewise all truncating stores and extending loads.
334 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
335 setTruncStoreAction(VT, InnerVT, Expand);
336 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
337 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
338 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
339 }
340
341 if (isTypeLegal(VT)) {
342 // These operations are legal for anything that can be stored in a
343 // vector register, even if there is no native support for the format
344 // as such. In particular, we can do these for v4f32 even though there
345 // are no specific instructions for that format.
346 setOperationAction(ISD::LOAD, VT, Legal);
347 setOperationAction(ISD::STORE, VT, Legal);
348 setOperationAction(ISD::VSELECT, VT, Legal);
349 setOperationAction(ISD::BITCAST, VT, Legal);
350 setOperationAction(ISD::UNDEF, VT, Legal);
351
352 // Likewise, except that we need to replace the nodes with something
353 // more specific.
354 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
355 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
356 }
357 }
358
359 // Handle integer vector types.
360 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
361 if (isTypeLegal(VT)) {
362 // These operations have direct equivalents.
363 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
364 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
365 setOperationAction(ISD::ADD, VT, Legal);
366 setOperationAction(ISD::SUB, VT, Legal);
367 if (VT != MVT::v2i64)
368 setOperationAction(ISD::MUL, VT, Legal);
369 setOperationAction(ISD::ABS, VT, Legal);
370 setOperationAction(ISD::AND, VT, Legal);
371 setOperationAction(ISD::OR, VT, Legal);
372 setOperationAction(ISD::XOR, VT, Legal);
373 if (Subtarget.hasVectorEnhancements1())
374 setOperationAction(ISD::CTPOP, VT, Legal);
375 else
376 setOperationAction(ISD::CTPOP, VT, Custom);
377 setOperationAction(ISD::CTTZ, VT, Legal);
378 setOperationAction(ISD::CTLZ, VT, Legal);
379
380 // Convert a GPR scalar to a vector by inserting it into element 0.
381 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
382
383 // Use a series of unpacks for extensions.
384 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
385 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
386
387 // Detect shifts by a scalar amount and convert them into
388 // V*_BY_SCALAR.
389 setOperationAction(ISD::SHL, VT, Custom);
390 setOperationAction(ISD::SRA, VT, Custom);
391 setOperationAction(ISD::SRL, VT, Custom);
392
393 // At present ROTL isn't matched by DAGCombiner. ROTR should be
394 // converted into ROTL.
395 setOperationAction(ISD::ROTL, VT, Expand);
396 setOperationAction(ISD::ROTR, VT, Expand);
397
398 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
399 // and inverting the result as necessary.
400 setOperationAction(ISD::SETCC, VT, Custom);
401 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
402 if (Subtarget.hasVectorEnhancements1())
403 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
404 }
405 }
406
407 if (Subtarget.hasVector()) {
408 // There should be no need to check for float types other than v2f64
409 // since <2 x f32> isn't a legal type.
410 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
411 setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal);
412 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
413 setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal);
414 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
415 setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal);
416 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
417 setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);
418
419 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
420 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal);
421 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
422 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal);
423 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
424 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f64, Legal);
425 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
426 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f64, Legal);
427 }
428
429 if (Subtarget.hasVectorEnhancements2()) {
430 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
431 setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal);
432 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
433 setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal);
434 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
435 setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal);
436 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
437 setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal);
438
439 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
440 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal);
441 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
442 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal);
443 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
444 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f32, Legal);
445 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
446 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f32, Legal);
447 }
448
449 // Handle floating-point types.
450 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
451 I <= MVT::LAST_FP_VALUETYPE;
452 ++I) {
453 MVT VT = MVT::SimpleValueType(I);
454 if (isTypeLegal(VT)) {
455 // We can use FI for FRINT.
456 setOperationAction(ISD::FRINT, VT, Legal);
457
458 // We can use the extended form of FI for other rounding operations.
459 if (Subtarget.hasFPExtension()) {
460 setOperationAction(ISD::FNEARBYINT, VT, Legal);
461 setOperationAction(ISD::FFLOOR, VT, Legal);
462 setOperationAction(ISD::FCEIL, VT, Legal);
463 setOperationAction(ISD::FTRUNC, VT, Legal);
464 setOperationAction(ISD::FROUND, VT, Legal);
465 }
466
467 // No special instructions for these.
468 setOperationAction(ISD::FSIN, VT, Expand);
469 setOperationAction(ISD::FCOS, VT, Expand);
470 setOperationAction(ISD::FSINCOS, VT, Expand);
471 setOperationAction(ISD::FREM, VT, Expand);
472 setOperationAction(ISD::FPOW, VT, Expand);
473
474 // Handle constrained floating-point operations.
475 setOperationAction(ISD::STRICT_FADD, VT, Legal);
476 setOperationAction(ISD::STRICT_FSUB, VT, Legal);
477 setOperationAction(ISD::STRICT_FMUL, VT, Legal);
478 setOperationAction(ISD::STRICT_FDIV, VT, Legal);
479 setOperationAction(ISD::STRICT_FMA, VT, Legal);
480 setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
481 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
482 setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal);
483 setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
484 if (Subtarget.hasFPExtension()) {
485 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
486 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
487 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
488 setOperationAction(ISD::STRICT_FROUND, VT, Legal);
489 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
490 }
491 }
492 }
493
494 // Handle floating-point vector types.
495 if (Subtarget.hasVector()) {
496 // Scalar-to-vector conversion is just a subreg.
497 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
498 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
499
500 // Some insertions and extractions can be done directly but others
501 // need to go via integers.
502 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
503 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
504 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
505 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
506
507 // These operations have direct equivalents.
508 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
509 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
510 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
511 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
512 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
513 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
514 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
515 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
516 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
517 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
518 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
519 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
520 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
521 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
522
523 // Handle constrained floating-point operations.
524 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
525 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
526 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
527 setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
528 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
529 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
530 setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
531 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
532 setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
533 setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
534 setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
535 setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
536 }
537
538 // The vector enhancements facility 1 has instructions for these.
539 if (Subtarget.hasVectorEnhancements1()) {
540 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
541 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
542 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
543 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
544 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
545 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
546 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
547 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
548 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
549 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
550 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
551 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
552 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
553 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
554
555 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
556 setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal);
557 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
558 setOperationAction(ISD::FMINIMUM, MVT::f64, Legal);
559
560 setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal);
561 setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal);
562 setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);
563 setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal);
564
565 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
566 setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
567 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
568 setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
569
570 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
571 setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
572 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
573 setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
574
575 setOperationAction(ISD::FMAXNUM, MVT::f128, Legal);
576 setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
577 setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
578 setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
579
580 // Handle constrained floating-point operations.
581 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
582 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
583 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
584 setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
585 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
586 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
587 setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
588 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
589 setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
590 setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
591 setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
592 setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
593 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
594 MVT::v4f32, MVT::v2f64 }) {
595 setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal);
596 setOperationAction(ISD::STRICT_FMINNUM, VT, Legal);
597 setOperationAction(ISD::STRICT_FMAXIMUM, VT, Legal);
598 setOperationAction(ISD::STRICT_FMINIMUM, VT, Legal);
599 }
600 }
601
602 // We only have fused f128 multiply-addition on vector registers.
603 if (!Subtarget.hasVectorEnhancements1()) {
604 setOperationAction(ISD::FMA, MVT::f128, Expand);
605 setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand);
606 }
607
608 // We don't have a copysign instruction on vector registers.
609 if (Subtarget.hasVectorEnhancements1())
610 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
611
612 // Needed so that we don't try to implement f128 constant loads using
613 // a load-and-extend of a f80 constant (in cases where the constant
614 // would fit in an f80).
615 for (MVT VT : MVT::fp_valuetypes())
616 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
617
618 // We don't have extending load instruction on vector registers.
619 if (Subtarget.hasVectorEnhancements1()) {
620 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
621 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
622 }
623
624 // Floating-point truncation and stores need to be done separately.
625 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
626 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
627 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
628
629 // We have 64-bit FPR<->GPR moves, but need special handling for
630 // 32-bit forms.
631 if (!Subtarget.hasVector()) {
632 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
633 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
634 }
635
636 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
637 // structure, but VAEND is a no-op.
638 setOperationAction(ISD::VASTART, MVT::Other, Custom);
639 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
640 setOperationAction(ISD::VAEND, MVT::Other, Expand);
641
642 // Codes for which we want to perform some z-specific combinations.
643 setTargetDAGCombine({ISD::ZERO_EXTEND,
644 ISD::SIGN_EXTEND,
645 ISD::SIGN_EXTEND_INREG,
646 ISD::LOAD,
647 ISD::STORE,
648 ISD::VECTOR_SHUFFLE,
649 ISD::EXTRACT_VECTOR_ELT,
650 ISD::FP_ROUND,
651 ISD::STRICT_FP_ROUND,
652 ISD::FP_EXTEND,
653 ISD::SINT_TO_FP,
654 ISD::UINT_TO_FP,
655 ISD::STRICT_FP_EXTEND,
656 ISD::BSWAP,
657 ISD::SDIV,
658 ISD::UDIV,
659 ISD::SREM,
660 ISD::UREM,
661 ISD::INTRINSIC_VOID,
662 ISD::INTRINSIC_W_CHAIN});
663
664 // Handle intrinsics.
665 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
666 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
667
668 // We want to use MVC in preference to even a single load/store pair.
669 MaxStoresPerMemcpy = 0;
670 MaxStoresPerMemcpyOptSize = 0;
671
672 // The main memset sequence is a byte store followed by an MVC.
673 // Two STC or MV..I stores win over that, but the kind of fused stores
674 // generated by target-independent code don't when the byte value is
675 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
676 // than "STC;MVC". Handle the choice in target-specific code instead.
677 MaxStoresPerMemset = 0;
678 MaxStoresPerMemsetOptSize = 0;
679
680 // Default to having -disable-strictnode-mutation on
681 IsStrictFPEnabled = true;
682}
683
684bool SystemZTargetLowering::useSoftFloat() const {
685 return Subtarget.hasSoftFloat();
686}
687
688EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
689 LLVMContext &, EVT VT) const {
690 if (!VT.isVector())
691 return MVT::i32;
692 return VT.changeVectorElementTypeToInteger();
693}
694
695bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(
696 const MachineFunction &MF, EVT VT) const {
697 VT = VT.getScalarType();
698
699 if (!VT.isSimple())
700 return false;
701
702 switch (VT.getSimpleVT().SimpleTy) {
703 case MVT::f32:
704 case MVT::f64:
705 return true;
706 case MVT::f128:
707 return Subtarget.hasVectorEnhancements1();
708 default:
709 break;
710 }
711
712 return false;
713}
714
715// Return true if the constant can be generated with a vector instruction,
716// such as VGM, VGMB or VREPI.
717bool SystemZVectorConstantInfo::isVectorConstantLegal(
718 const SystemZSubtarget &Subtarget) {
719 const SystemZInstrInfo *TII =
720 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
721 if (!Subtarget.hasVector() ||
3
Assuming the condition is false
722 (isFP128 && !Subtarget.hasVectorEnhancements1()))
4
Assuming field 'isFP128' is false
723 return false;
724
725 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
726 // preferred way of creating all-zero and all-one vectors so give it
727 // priority over other methods below.
728 unsigned Mask = 0;
729 unsigned I = 0;
730 for (; I < SystemZ::VectorBytes; ++I) {
5
Loop condition is true. Entering loop body
731 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
732 if (Byte == 0xff)
6
Assuming 'Byte' is not equal to 255
7
Taking false branch
733 Mask |= 1ULL << I;
734 else if (Byte != 0)
8
Assuming 'Byte' is not equal to 0
9
Taking true branch
735 break;
736 }
737 if (I
10.1
'I' is not equal to 'VectorBytes'
10.1
'I' is not equal to 'VectorBytes'
10.1
'I' is not equal to 'VectorBytes'
== SystemZ::VectorBytes) {
10
Execution continues on line 737
11
Taking false branch
738 Opcode = SystemZISD::BYTE_MASK;
739 OpVals.push_back(Mask);
740 VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16);
741 return true;
742 }
743
744 if (SplatBitSize
11.1
Field 'SplatBitSize' is <= 64
11.1
Field 'SplatBitSize' is <= 64
11.1
Field 'SplatBitSize' is <= 64
> 64)
12
Taking false branch
745 return false;
746
747 auto tryValue = [&](uint64_t Value) -> bool {
748 // Try VECTOR REPLICATE IMMEDIATE
749 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
750 if (isInt<16>(SignedValue)) {
751 OpVals.push_back(((unsigned) SignedValue));
752 Opcode = SystemZISD::REPLICATE;
753 VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
754 SystemZ::VectorBits / SplatBitSize);
755 return true;
756 }
757 // Try VECTOR GENERATE MASK
758 unsigned Start, End;
759 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
760 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
761 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
762 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
763 OpVals.push_back(Start - (64 - SplatBitSize));
764 OpVals.push_back(End - (64 - SplatBitSize));
765 Opcode = SystemZISD::ROTATE_MASK;
766 VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
767 SystemZ::VectorBits / SplatBitSize);
768 return true;
769 }
770 return false;
771 };
772
773 // First try assuming that any undefined bits above the highest set bit
774 // and below the lowest set bit are 1s. This increases the likelihood of
775 // being able to use a sign-extended element value in VECTOR REPLICATE
776 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
777 uint64_t SplatBitsZ = SplatBits.getZExtValue();
778 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
779 uint64_t Lower =
780 (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
13
Calling 'findFirstSet<unsigned long>'
20
Returning from 'findFirstSet<unsigned long>'
21
The result of the left shift is undefined due to shifting by '18446744073709551615', which is greater or equal to the width of type 'uint64_t'
781 uint64_t Upper =
782 (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
783 if (tryValue(SplatBitsZ | Upper | Lower))
784 return true;
785
786 // Now try assuming that any undefined bits between the first and
787 // last defined set bits are set. This increases the chances of
788 // using a non-wraparound mask.
789 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
790 return tryValue(SplatBitsZ | Middle);
791}
792
793SystemZVectorConstantInfo::SystemZVectorConstantInfo(APFloat FPImm) {
794 IntBits = FPImm.bitcastToAPInt().zextOrSelf(128);
795 isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
796 SplatBits = FPImm.bitcastToAPInt();
797 unsigned Width = SplatBits.getBitWidth();
798 IntBits <<= (SystemZ::VectorBits - Width);
799
800 // Find the smallest splat.
801 while (Width > 8) {
802 unsigned HalfSize = Width / 2;
803 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
804 APInt LowValue = SplatBits.trunc(HalfSize);
805
806 // If the two halves do not match, stop here.
807 if (HighValue != LowValue || 8 > HalfSize)
808 break;
809
810 SplatBits = HighValue;
811 Width = HalfSize;
812 }
813 SplatUndef = 0;
814 SplatBitSize = Width;
815}
816
817SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) {
818 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR")(static_cast <bool> (BVN->isConstant() && "Expected a constant BUILD_VECTOR"
) ? void (0) : __assert_fail ("BVN->isConstant() && \"Expected a constant BUILD_VECTOR\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 818, __extension__
__PRETTY_FUNCTION__))
;
819 bool HasAnyUndefs;
820
821 // Get IntBits by finding the 128 bit splat.
822 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
823 true);
824
825 // Get SplatBits by finding the 8 bit or greater splat.
826 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
827 true);
828}
829
830bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
831 bool ForCodeSize) const {
832 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
833 if (Imm.isZero() || Imm.isNegZero())
1
Taking false branch
834 return true;
835
836 return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
2
Calling 'SystemZVectorConstantInfo::isVectorConstantLegal'
837}
838
839/// Returns true if stack probing through inline assembly is requested.
840bool SystemZTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
841 // If the function specifically requests inline stack probes, emit them.
842 if (MF.getFunction().hasFnAttribute("probe-stack"))
843 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
844 "inline-asm";
845 return false;
846}
847
848bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
849 // We can use CGFI or CLGFI.
850 return isInt<32>(Imm) || isUInt<32>(Imm);
851}
852
853bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
854 // We can use ALGFI or SLGFI.
855 return isUInt<32>(Imm) || isUInt<32>(-Imm);
856}
857
858bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
859 EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const {
860 // Unaligned accesses should never be slower than the expanded version.
861 // We check specifically for aligned accesses in the few cases where
862 // they are required.
863 if (Fast)
864 *Fast = true;
865 return true;
866}
867
868// Information about the addressing mode for a memory access.
869struct AddressingMode {
870 // True if a long displacement is supported.
871 bool LongDisplacement;
872
873 // True if use of index register is supported.
874 bool IndexReg;
875
876 AddressingMode(bool LongDispl, bool IdxReg) :
877 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
878};
879
880// Return the desired addressing mode for a Load which has only one use (in
881// the same block) which is a Store.
882static AddressingMode getLoadStoreAddrMode(bool HasVector,
883 Type *Ty) {
884 // With vector support a Load->Store combination may be combined to either
885 // an MVC or vector operations and it seems to work best to allow the
886 // vector addressing mode.
887 if (HasVector)
888 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
889
890 // Otherwise only the MVC case is special.
891 bool MVC = Ty->isIntegerTy(8);
892 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
893}
894
895// Return the addressing mode which seems most desirable given an LLVM
896// Instruction pointer.
897static AddressingMode
898supportedAddressingMode(Instruction *I, bool HasVector) {
899 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
900 switch (II->getIntrinsicID()) {
901 default: break;
902 case Intrinsic::memset:
903 case Intrinsic::memmove:
904 case Intrinsic::memcpy:
905 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
906 }
907 }
908
909 if (isa<LoadInst>(I) && I->hasOneUse()) {
910 auto *SingleUser = cast<Instruction>(*I->user_begin());
911 if (SingleUser->getParent() == I->getParent()) {
912 if (isa<ICmpInst>(SingleUser)) {
913 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
914 if (C->getBitWidth() <= 64 &&
915 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
916 // Comparison of memory with 16 bit signed / unsigned immediate
917 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
918 } else if (isa<StoreInst>(SingleUser))
919 // Load->Store
920 return getLoadStoreAddrMode(HasVector, I->getType());
921 }
922 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
923 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
924 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
925 // Load->Store
926 return getLoadStoreAddrMode(HasVector, LoadI->getType());
927 }
928
929 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
930
931 // * Use LDE instead of LE/LEY for z13 to avoid partial register
932 // dependencies (LDE only supports small offsets).
933 // * Utilize the vector registers to hold floating point
934 // values (vector load / store instructions only support small
935 // offsets).
936
937 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
938 I->getOperand(0)->getType());
939 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
940 bool IsVectorAccess = MemAccessTy->isVectorTy();
941
942 // A store of an extracted vector element will be combined into a VSTE type
943 // instruction.
944 if (!IsVectorAccess && isa<StoreInst>(I)) {
945 Value *DataOp = I->getOperand(0);
946 if (isa<ExtractElementInst>(DataOp))
947 IsVectorAccess = true;
948 }
949
950 // A load which gets inserted into a vector element will be combined into a
951 // VLE type instruction.
952 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
953 User *LoadUser = *I->user_begin();
954 if (isa<InsertElementInst>(LoadUser))
955 IsVectorAccess = true;
956 }
957
958 if (IsFPAccess || IsVectorAccess)
959 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
960 }
961
962 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
963}
964
965bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
966 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
967 // Punt on globals for now, although they can be used in limited
968 // RELATIVE LONG cases.
969 if (AM.BaseGV)
970 return false;
971
972 // Require a 20-bit signed offset.
973 if (!isInt<20>(AM.BaseOffs))
974 return false;
975
976 AddressingMode SupportedAM(true, true);
977 if (I != nullptr)
978 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
979
980 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
981 return false;
982
983 if (!SupportedAM.IndexReg)
984 // No indexing allowed.
985 return AM.Scale == 0;
986 else
987 // Indexing is OK but no scale factor can be applied.
988 return AM.Scale == 0 || AM.Scale == 1;
989}
990
991bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
992 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
993 return false;
994 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedSize();
995 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedSize();
996 return FromBits > ToBits;
997}
998
999bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
1000 if (!FromVT.isInteger() || !ToVT.isInteger())
1001 return false;
1002 unsigned FromBits = FromVT.getFixedSizeInBits();
1003 unsigned ToBits = ToVT.getFixedSizeInBits();
1004 return FromBits > ToBits;
1005}
1006
1007//===----------------------------------------------------------------------===//
1008// Inline asm support
1009//===----------------------------------------------------------------------===//
1010
1011TargetLowering::ConstraintType
1012SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
1013 if (Constraint.size() == 1) {
1014 switch (Constraint[0]) {
1015 case 'a': // Address register
1016 case 'd': // Data register (equivalent to 'r')
1017 case 'f': // Floating-point register
1018 case 'h': // High-part register
1019 case 'r': // General-purpose register
1020 case 'v': // Vector register
1021 return C_RegisterClass;
1022
1023 case 'Q': // Memory with base and unsigned 12-bit displacement
1024 case 'R': // Likewise, plus an index
1025 case 'S': // Memory with base and signed 20-bit displacement
1026 case 'T': // Likewise, plus an index
1027 case 'm': // Equivalent to 'T'.
1028 return C_Memory;
1029
1030 case 'I': // Unsigned 8-bit constant
1031 case 'J': // Unsigned 12-bit constant
1032 case 'K': // Signed 16-bit constant
1033 case 'L': // Signed 20-bit displacement (on all targets we support)
1034 case 'M': // 0x7fffffff
1035 return C_Immediate;
1036
1037 default:
1038 break;
1039 }
1040 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1041 switch (Constraint[1]) {
1042 case 'Q': // Address with base and unsigned 12-bit displacement
1043 case 'R': // Likewise, plus an index
1044 case 'S': // Address with base and signed 20-bit displacement
1045 case 'T': // Likewise, plus an index
1046 return C_Address;
1047
1048 default:
1049 break;
1050 }
1051 }
1052 return TargetLowering::getConstraintType(Constraint);
1053}
1054
1055TargetLowering::ConstraintWeight SystemZTargetLowering::
1056getSingleConstraintMatchWeight(AsmOperandInfo &info,
1057 const char *constraint) const {
1058 ConstraintWeight weight = CW_Invalid;
1059 Value *CallOperandVal = info.CallOperandVal;
1060 // If we don't have a value, we can't do a match,
1061 // but allow it at the lowest weight.
1062 if (!CallOperandVal)
1063 return CW_Default;
1064 Type *type = CallOperandVal->getType();
1065 // Look at the constraint type.
1066 switch (*constraint) {
1067 default:
1068 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
1069 break;
1070
1071 case 'a': // Address register
1072 case 'd': // Data register (equivalent to 'r')
1073 case 'h': // High-part register
1074 case 'r': // General-purpose register
1075 if (CallOperandVal->getType()->isIntegerTy())
1076 weight = CW_Register;
1077 break;
1078
1079 case 'f': // Floating-point register
1080 if (type->isFloatingPointTy())
1081 weight = CW_Register;
1082 break;
1083
1084 case 'v': // Vector register
1085 if ((type->isVectorTy() || type->isFloatingPointTy()) &&
1086 Subtarget.hasVector())
1087 weight = CW_Register;
1088 break;
1089
1090 case 'I': // Unsigned 8-bit constant
1091 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1092 if (isUInt<8>(C->getZExtValue()))
1093 weight = CW_Constant;
1094 break;
1095
1096 case 'J': // Unsigned 12-bit constant
1097 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1098 if (isUInt<12>(C->getZExtValue()))
1099 weight = CW_Constant;
1100 break;
1101
1102 case 'K': // Signed 16-bit constant
1103 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1104 if (isInt<16>(C->getSExtValue()))
1105 weight = CW_Constant;
1106 break;
1107
1108 case 'L': // Signed 20-bit displacement (on all targets we support)
1109 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1110 if (isInt<20>(C->getSExtValue()))
1111 weight = CW_Constant;
1112 break;
1113
1114 case 'M': // 0x7fffffff
1115 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1116 if (C->getZExtValue() == 0x7fffffff)
1117 weight = CW_Constant;
1118 break;
1119 }
1120 return weight;
1121}
1122
1123// Parse a "{tNNN}" register constraint for which the register type "t"
1124// has already been verified. MC is the class associated with "t" and
1125// Map maps 0-based register numbers to LLVM register numbers.
1126static std::pair<unsigned, const TargetRegisterClass *>
1127parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC,
1128 const unsigned *Map, unsigned Size) {
1129 assert(*(Constraint.end()-1) == '}' && "Missing '}'")(static_cast <bool> (*(Constraint.end()-1) == '}' &&
"Missing '}'") ? void (0) : __assert_fail ("*(Constraint.end()-1) == '}' && \"Missing '}'\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1129, __extension__
__PRETTY_FUNCTION__))
;
1130 if (isdigit(Constraint[2])) {
1131 unsigned Index;
1132 bool Failed =
1133 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1134 if (!Failed && Index < Size && Map[Index])
1135 return std::make_pair(Map[Index], RC);
1136 }
1137 return std::make_pair(0U, nullptr);
1138}
1139
1140std::pair<unsigned, const TargetRegisterClass *>
1141SystemZTargetLowering::getRegForInlineAsmConstraint(
1142 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1143 if (Constraint.size() == 1) {
1144 // GCC Constraint Letters
1145 switch (Constraint[0]) {
1146 default: break;
1147 case 'd': // Data register (equivalent to 'r')
1148 case 'r': // General-purpose register
1149 if (VT == MVT::i64)
1150 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1151 else if (VT == MVT::i128)
1152 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1153 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1154
1155 case 'a': // Address register
1156 if (VT == MVT::i64)
1157 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1158 else if (VT == MVT::i128)
1159 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1160 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1161
1162 case 'h': // High-part register (an LLVM extension)
1163 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1164
1165 case 'f': // Floating-point register
1166 if (!useSoftFloat()) {
1167 if (VT == MVT::f64)
1168 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1169 else if (VT == MVT::f128)
1170 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1171 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1172 }
1173 break;
1174 case 'v': // Vector register
1175 if (Subtarget.hasVector()) {
1176 if (VT == MVT::f32)
1177 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1178 if (VT == MVT::f64)
1179 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1180 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1181 }
1182 break;
1183 }
1184 }
1185 if (Constraint.size() > 0 && Constraint[0] == '{') {
1186 // We need to override the default register parsing for GPRs and FPRs
1187 // because the interpretation depends on VT. The internal names of
1188 // the registers are also different from the external names
1189 // (F0D and F0S instead of F0, etc.).
1190 if (Constraint[1] == 'r') {
1191 if (VT == MVT::i32)
1192 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1193 SystemZMC::GR32Regs, 16);
1194 if (VT == MVT::i128)
1195 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1196 SystemZMC::GR128Regs, 16);
1197 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1198 SystemZMC::GR64Regs, 16);
1199 }
1200 if (Constraint[1] == 'f') {
1201 if (useSoftFloat())
1202 return std::make_pair(
1203 0u, static_cast<const TargetRegisterClass *>(nullptr));
1204 if (VT == MVT::f32)
1205 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1206 SystemZMC::FP32Regs, 16);
1207 if (VT == MVT::f128)
1208 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1209 SystemZMC::FP128Regs, 16);
1210 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1211 SystemZMC::FP64Regs, 16);
1212 }
1213 if (Constraint[1] == 'v') {
1214 if (!Subtarget.hasVector())
1215 return std::make_pair(
1216 0u, static_cast<const TargetRegisterClass *>(nullptr));
1217 if (VT == MVT::f32)
1218 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1219 SystemZMC::VR32Regs, 32);
1220 if (VT == MVT::f64)
1221 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1222 SystemZMC::VR64Regs, 32);
1223 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1224 SystemZMC::VR128Regs, 32);
1225 }
1226 }
1227 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1228}
1229
1230// FIXME? Maybe this could be a TableGen attribute on some registers and
1231// this table could be generated automatically from RegInfo.
1232Register SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT,
1233 const MachineFunction &MF) const {
1234
1235 Register Reg = StringSwitch<Register>(RegName)
1236 .Case("r15", SystemZ::R15D)
1237 .Default(0);
1238 if (Reg)
1239 return Reg;
1240 report_fatal_error("Invalid register name global variable");
1241}
1242
1243void SystemZTargetLowering::
1244LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1245 std::vector<SDValue> &Ops,
1246 SelectionDAG &DAG) const {
1247 // Only support length 1 constraints for now.
1248 if (Constraint.length() == 1) {
1249 switch (Constraint[0]) {
1250 case 'I': // Unsigned 8-bit constant
1251 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1252 if (isUInt<8>(C->getZExtValue()))
1253 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1254 Op.getValueType()));
1255 return;
1256
1257 case 'J': // Unsigned 12-bit constant
1258 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1259 if (isUInt<12>(C->getZExtValue()))
1260 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1261 Op.getValueType()));
1262 return;
1263
1264 case 'K': // Signed 16-bit constant
1265 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1266 if (isInt<16>(C->getSExtValue()))
1267 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1268 Op.getValueType()));
1269 return;
1270
1271 case 'L': // Signed 20-bit displacement (on all targets we support)
1272 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1273 if (isInt<20>(C->getSExtValue()))
1274 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1275 Op.getValueType()));
1276 return;
1277
1278 case 'M': // 0x7fffffff
1279 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1280 if (C->getZExtValue() == 0x7fffffff)
1281 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1282 Op.getValueType()));
1283 return;
1284 }
1285 }
1286 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1287}
1288
1289//===----------------------------------------------------------------------===//
1290// Calling conventions
1291//===----------------------------------------------------------------------===//
1292
1293#include "SystemZGenCallingConv.inc"
1294
1295const MCPhysReg *SystemZTargetLowering::getScratchRegisters(
1296 CallingConv::ID) const {
1297 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1298 SystemZ::R14D, 0 };
1299 return ScratchRegs;
1300}
1301
1302bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
1303 Type *ToType) const {
1304 return isTruncateFree(FromType, ToType);
1305}
1306
1307bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1308 return CI->isTailCall();
1309}
1310
1311// We do not yet support 128-bit single-element vector types. If the user
1312// attempts to use such types as function argument or return type, prefer
1313// to error out instead of emitting code violating the ABI.
1314static void VerifyVectorType(MVT VT, EVT ArgVT) {
1315 if (ArgVT.isVector() && !VT.isVector())
1316 report_fatal_error("Unsupported vector argument or return type");
1317}
1318
1319static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) {
1320 for (unsigned i = 0; i < Ins.size(); ++i)
1321 VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
1322}
1323
1324static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
1325 for (unsigned i = 0; i < Outs.size(); ++i)
1326 VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
1327}
1328
1329// Value is a value that has been passed to us in the location described by VA
1330// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1331// any loads onto Chain.
1332static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL,
1333 CCValAssign &VA, SDValue Chain,
1334 SDValue Value) {
1335 // If the argument has been promoted from a smaller type, insert an
1336 // assertion to capture this.
1337 if (VA.getLocInfo() == CCValAssign::SExt)
1338 Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
1339 DAG.getValueType(VA.getValVT()));
1340 else if (VA.getLocInfo() == CCValAssign::ZExt)
1341 Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
1342 DAG.getValueType(VA.getValVT()));
1343
1344 if (VA.isExtInLoc())
1345 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1346 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1347 // If this is a short vector argument loaded from the stack,
1348 // extend from i64 to full vector size and then bitcast.
1349 assert(VA.getLocVT() == MVT::i64)(static_cast <bool> (VA.getLocVT() == MVT::i64) ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1349, __extension__ __PRETTY_FUNCTION__))
;
1350 assert(VA.getValVT().isVector())(static_cast <bool> (VA.getValVT().isVector()) ? void (
0) : __assert_fail ("VA.getValVT().isVector()", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1350, __extension__ __PRETTY_FUNCTION__))
;
1351 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1352 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1353 } else
1354 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo")(static_cast <bool> (VA.getLocInfo() == CCValAssign::Full
&& "Unsupported getLocInfo") ? void (0) : __assert_fail
("VA.getLocInfo() == CCValAssign::Full && \"Unsupported getLocInfo\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1354, __extension__
__PRETTY_FUNCTION__))
;
1355 return Value;
1356}
1357
1358// Value is a value of type VA.getValVT() that we need to copy into
1359// the location described by VA. Return a copy of Value converted to
1360// VA.getValVT(). The caller is responsible for handling indirect values.
1361static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
1362 CCValAssign &VA, SDValue Value) {
1363 switch (VA.getLocInfo()) {
1364 case CCValAssign::SExt:
1365 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1366 case CCValAssign::ZExt:
1367 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1368 case CCValAssign::AExt:
1369 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1370 case CCValAssign::BCvt: {
1371 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128)(static_cast <bool> (VA.getLocVT() == MVT::i64 || VA.getLocVT
() == MVT::i128) ? void (0) : __assert_fail ("VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1371, __extension__
__PRETTY_FUNCTION__))
;
1372 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f64 ||(static_cast <bool> (VA.getValVT().isVector() || VA.getValVT
() == MVT::f64 || VA.getValVT() == MVT::f128) ? void (0) : __assert_fail
("VA.getValVT().isVector() || VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1373, __extension__
__PRETTY_FUNCTION__))
1373 VA.getValVT() == MVT::f128)(static_cast <bool> (VA.getValVT().isVector() || VA.getValVT
() == MVT::f64 || VA.getValVT() == MVT::f128) ? void (0) : __assert_fail
("VA.getValVT().isVector() || VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1373, __extension__
__PRETTY_FUNCTION__))
;
1374 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1375 ? MVT::v2i64
1376 : VA.getLocVT();
1377 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1378 // For ELF, this is a short vector argument to be stored to the stack,
1379 // bitcast to v2i64 and then extract first element.
1380 if (BitCastToType == MVT::v2i64)
1381 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1382 DAG.getConstant(0, DL, MVT::i32));
1383 return Value;
1384 }
1385 case CCValAssign::Full:
1386 return Value;
1387 default:
1388 llvm_unreachable("Unhandled getLocInfo()")::llvm::llvm_unreachable_internal("Unhandled getLocInfo()", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1388)
;
1389 }
1390}
1391
1392static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) {
1393 SDLoc DL(In);
1394 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
1395 DAG.getIntPtrConstant(0, DL));
1396 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
1397 DAG.getIntPtrConstant(1, DL));
1398 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1399 MVT::Untyped, Hi, Lo);
1400 return SDValue(Pair, 0);
1401}
1402
1403static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) {
1404 SDLoc DL(In);
1405 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1406 DL, MVT::i64, In);
1407 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1408 DL, MVT::i64, In);
1409 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1410}
1411
1412bool SystemZTargetLowering::splitValueIntoRegisterParts(
1413 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1414 unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
1415 EVT ValueVT = Val.getValueType();
1416 assert((ValueVT != MVT::i128 ||(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1419, __extension__
__PRETTY_FUNCTION__))
1417 ((NumParts == 1 && PartVT == MVT::Untyped) ||(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1419, __extension__
__PRETTY_FUNCTION__))
1418 (NumParts == 2 && PartVT == MVT::i64))) &&(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1419, __extension__
__PRETTY_FUNCTION__))
1419 "Unknown handling of i128 value.")(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1419, __extension__
__PRETTY_FUNCTION__))
;
1420 if (ValueVT == MVT::i128 && NumParts == 1) {
1421 // Inline assembly operand.
1422 Parts[0] = lowerI128ToGR128(DAG, Val);
1423 return true;
1424 }
1425 return false;
1426}
1427
1428SDValue SystemZTargetLowering::joinRegisterPartsIntoValue(
1429 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1430 MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
1431 assert((ValueVT != MVT::i128 ||(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1434, __extension__
__PRETTY_FUNCTION__))
1432 ((NumParts == 1 && PartVT == MVT::Untyped) ||(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1434, __extension__
__PRETTY_FUNCTION__))
1433 (NumParts == 2 && PartVT == MVT::i64))) &&(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1434, __extension__
__PRETTY_FUNCTION__))
1434 "Unknown handling of i128 value.")(static_cast <bool> ((ValueVT != MVT::i128 || ((NumParts
== 1 && PartVT == MVT::Untyped) || (NumParts == 2 &&
PartVT == MVT::i64))) && "Unknown handling of i128 value."
) ? void (0) : __assert_fail ("(ValueVT != MVT::i128 || ((NumParts == 1 && PartVT == MVT::Untyped) || (NumParts == 2 && PartVT == MVT::i64))) && \"Unknown handling of i128 value.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1434, __extension__
__PRETTY_FUNCTION__))
;
1435 if (ValueVT == MVT::i128 && NumParts == 1)
1436 // Inline assembly operand.
1437 return lowerGR128ToI128(DAG, Parts[0]);
1438 return SDValue();
1439}
1440
1441SDValue SystemZTargetLowering::LowerFormalArguments(
1442 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1443 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1444 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1445 MachineFunction &MF = DAG.getMachineFunction();
1446 MachineFrameInfo &MFI = MF.getFrameInfo();
1447 MachineRegisterInfo &MRI = MF.getRegInfo();
1448 SystemZMachineFunctionInfo *FuncInfo =
1449 MF.getInfo<SystemZMachineFunctionInfo>();
1450 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1451 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1452
1453 // Detect unsupported vector argument types.
1454 if (Subtarget.hasVector())
1455 VerifyVectorTypes(Ins);
1456
1457 // Assign locations to all of the incoming arguments.
1458 SmallVector<CCValAssign, 16> ArgLocs;
1459 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1460 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1461
1462 unsigned NumFixedGPRs = 0;
1463 unsigned NumFixedFPRs = 0;
1464 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1465 SDValue ArgValue;
1466 CCValAssign &VA = ArgLocs[I];
1467 EVT LocVT = VA.getLocVT();
1468 if (VA.isRegLoc()) {
1469 // Arguments passed in registers
1470 const TargetRegisterClass *RC;
1471 switch (LocVT.getSimpleVT().SimpleTy) {
1472 default:
1473 // Integers smaller than i64 should be promoted to i64.
1474 llvm_unreachable("Unexpected argument type")::llvm::llvm_unreachable_internal("Unexpected argument type",
"llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1474)
;
1475 case MVT::i32:
1476 NumFixedGPRs += 1;
1477 RC = &SystemZ::GR32BitRegClass;
1478 break;
1479 case MVT::i64:
1480 NumFixedGPRs += 1;
1481 RC = &SystemZ::GR64BitRegClass;
1482 break;
1483 case MVT::f32:
1484 NumFixedFPRs += 1;
1485 RC = &SystemZ::FP32BitRegClass;
1486 break;
1487 case MVT::f64:
1488 NumFixedFPRs += 1;
1489 RC = &SystemZ::FP64BitRegClass;
1490 break;
1491 case MVT::f128:
1492 NumFixedFPRs += 2;
1493 RC = &SystemZ::FP128BitRegClass;
1494 break;
1495 case MVT::v16i8:
1496 case MVT::v8i16:
1497 case MVT::v4i32:
1498 case MVT::v2i64:
1499 case MVT::v4f32:
1500 case MVT::v2f64:
1501 RC = &SystemZ::VR128BitRegClass;
1502 break;
1503 }
1504
1505 Register VReg = MRI.createVirtualRegister(RC);
1506 MRI.addLiveIn(VA.getLocReg(), VReg);
1507 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1508 } else {
1509 assert(VA.isMemLoc() && "Argument not register or memory")(static_cast <bool> (VA.isMemLoc() && "Argument not register or memory"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"Argument not register or memory\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1509, __extension__
__PRETTY_FUNCTION__))
;
1510
1511 // Create the frame index object for this incoming parameter.
1512 // FIXME: Pre-include call frame size in the offset, should not
1513 // need to manually add it here.
1514 int64_t ArgSPOffset = VA.getLocMemOffset();
1515 if (Subtarget.isTargetXPLINK64()) {
1516 auto &XPRegs =
1517 Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
1518 ArgSPOffset += XPRegs.getCallFrameSize();
1519 }
1520 int FI =
1521 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1522
1523 // Create the SelectionDAG nodes corresponding to a load
1524 // from this parameter. Unpromoted ints and floats are
1525 // passed as right-justified 8-byte values.
1526 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1527 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1528 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1529 DAG.getIntPtrConstant(4, DL));
1530 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1531 MachinePointerInfo::getFixedStack(MF, FI));
1532 }
1533
1534 // Convert the value of the argument register into the value that's
1535 // being passed.
1536 if (VA.getLocInfo() == CCValAssign::Indirect) {
1537 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1538 MachinePointerInfo()));
1539 // If the original argument was split (e.g. i128), we need
1540 // to load all parts of it here (using the same address).
1541 unsigned ArgIndex = Ins[I].OrigArgIndex;
1542 assert (Ins[I].PartOffset == 0)(static_cast <bool> (Ins[I].PartOffset == 0) ? void (0)
: __assert_fail ("Ins[I].PartOffset == 0", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1542, __extension__ __PRETTY_FUNCTION__))
;
1543 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1544 CCValAssign &PartVA = ArgLocs[I + 1];
1545 unsigned PartOffset = Ins[I + 1].PartOffset;
1546 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1547 DAG.getIntPtrConstant(PartOffset, DL));
1548 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1549 MachinePointerInfo()));
1550 ++I;
1551 }
1552 } else
1553 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1554 }
1555
1556 // FIXME: Add support for lowering varargs for XPLINK64 in a later patch.
1557 if (IsVarArg && Subtarget.isTargetELF()) {
1558 // Save the number of non-varargs registers for later use by va_start, etc.
1559 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1560 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1561
1562 // Likewise the address (in the form of a frame index) of where the
1563 // first stack vararg would be. The 1-byte size here is arbitrary.
1564 int64_t StackSize = CCInfo.getNextStackOffset();
1565 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
1566
1567 // ...and a similar frame index for the caller-allocated save area
1568 // that will be used to store the incoming registers.
1569 int64_t RegSaveOffset =
1570 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1571 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1572 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1573
1574 // Store the FPR varargs in the reserved frame slots. (We store the
1575 // GPRs as part of the prologue.)
1576 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1577 SDValue MemOps[SystemZ::ELFNumArgFPRs];
1578 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1579 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1580 int FI =
1581 MFI.CreateFixedObject(8, -SystemZMC::ELFCallFrameSize + Offset, true);
1582 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1583 Register VReg = MF.addLiveIn(SystemZ::ELFArgFPRs[I],
1584 &SystemZ::FP64BitRegClass);
1585 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1586 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1587 MachinePointerInfo::getFixedStack(MF, FI));
1588 }
1589 // Join the stores, which are independent of one another.
1590 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1591 makeArrayRef(&MemOps[NumFixedFPRs],
1592 SystemZ::ELFNumArgFPRs-NumFixedFPRs));
1593 }
1594 }
1595
1596 // FIXME: For XPLINK64, Add in support for handling incoming "ADA" special
1597 // register (R5)
1598 return Chain;
1599}
1600
1601static bool canUseSiblingCall(const CCState &ArgCCInfo,
1602 SmallVectorImpl<CCValAssign> &ArgLocs,
1603 SmallVectorImpl<ISD::OutputArg> &Outs) {
1604 // Punt if there are any indirect or stack arguments, or if the call
1605 // needs the callee-saved argument register R6, or if the call uses
1606 // the callee-saved register arguments SwiftSelf and SwiftError.
1607 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1608 CCValAssign &VA = ArgLocs[I];
1609 if (VA.getLocInfo() == CCValAssign::Indirect)
1610 return false;
1611 if (!VA.isRegLoc())
1612 return false;
1613 Register Reg = VA.getLocReg();
1614 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1615 return false;
1616 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1617 return false;
1618 }
1619 return true;
1620}
1621
1622SDValue
1623SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
1624 SmallVectorImpl<SDValue> &InVals) const {
1625 SelectionDAG &DAG = CLI.DAG;
1626 SDLoc &DL = CLI.DL;
1627 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1628 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1629 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1630 SDValue Chain = CLI.Chain;
1631 SDValue Callee = CLI.Callee;
1632 bool &IsTailCall = CLI.IsTailCall;
1633 CallingConv::ID CallConv = CLI.CallConv;
1634 bool IsVarArg = CLI.IsVarArg;
1635 MachineFunction &MF = DAG.getMachineFunction();
1636 EVT PtrVT = getPointerTy(MF.getDataLayout());
1637 LLVMContext &Ctx = *DAG.getContext();
1638 SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
1639
1640 // FIXME: z/OS support to be added in later.
1641 if (Subtarget.isTargetXPLINK64())
1642 IsTailCall = false;
1643
1644 // Detect unsupported vector argument and return types.
1645 if (Subtarget.hasVector()) {
1646 VerifyVectorTypes(Outs);
1647 VerifyVectorTypes(Ins);
1648 }
1649
1650 // Analyze the operands of the call, assigning locations to each operand.
1651 SmallVector<CCValAssign, 16> ArgLocs;
1652 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1653 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1654
1655 // We don't support GuaranteedTailCallOpt, only automatically-detected
1656 // sibling calls.
1657 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1658 IsTailCall = false;
1659
1660 // Get a count of how many bytes are to be pushed on the stack.
1661 unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1662
1663 if (Subtarget.isTargetXPLINK64())
1664 // Although the XPLINK specifications for AMODE64 state that minimum size
1665 // of the param area is minimum 32 bytes and no rounding is otherwise
1666 // specified, we round this area in 64 bytes increments to be compatible
1667 // with existing compilers.
1668 NumBytes = std::max(64U, (unsigned)alignTo(NumBytes, 64));
1669
1670 // Mark the start of the call.
1671 if (!IsTailCall)
1672 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1673
1674 // Copy argument values to their designated locations.
1675 SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
1676 SmallVector<SDValue, 8> MemOpChains;
1677 SDValue StackPtr;
1678 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1679 CCValAssign &VA = ArgLocs[I];
1680 SDValue ArgValue = OutVals[I];
1681
1682 if (VA.getLocInfo() == CCValAssign::Indirect) {
1683 // Store the argument in a stack slot and pass its address.
1684 unsigned ArgIndex = Outs[I].OrigArgIndex;
1685 EVT SlotVT;
1686 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1687 // Allocate the full stack space for a promoted (and split) argument.
1688 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1689 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1690 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1691 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1692 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1693 } else {
1694 SlotVT = Outs[I].ArgVT;
1695 }
1696 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1697 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1698 MemOpChains.push_back(
1699 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1700 MachinePointerInfo::getFixedStack(MF, FI)));
1701 // If the original argument was split (e.g. i128), we need
1702 // to store all parts of it here (and pass just one address).
1703 assert (Outs[I].PartOffset == 0)(static_cast <bool> (Outs[I].PartOffset == 0) ? void (0
) : __assert_fail ("Outs[I].PartOffset == 0", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 1703, __extension__ __PRETTY_FUNCTION__))
;
1704 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1705 SDValue PartValue = OutVals[I + 1];
1706 unsigned PartOffset = Outs[I + 1].PartOffset;
1707 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1708 DAG.getIntPtrConstant(PartOffset, DL));
1709 MemOpChains.push_back(
1710 DAG.getStore(Chain, DL, PartValue, Address,
1711 MachinePointerInfo::getFixedStack(MF, FI)));
1712 assert((PartOffset + PartValue.getValueType().getStoreSize() <=(static_cast <bool> ((PartOffset + PartValue.getValueType
().getStoreSize() <= SlotVT.getStoreSize()) && "Not enough space for argument part!"
) ? void (0) : __assert_fail ("(PartOffset + PartValue.getValueType().getStoreSize() <= SlotVT.getStoreSize()) && \"Not enough space for argument part!\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1713, __extension__
__PRETTY_FUNCTION__))
1713 SlotVT.getStoreSize()) && "Not enough space for argument part!")(static_cast <bool> ((PartOffset + PartValue.getValueType
().getStoreSize() <= SlotVT.getStoreSize()) && "Not enough space for argument part!"
) ? void (0) : __assert_fail ("(PartOffset + PartValue.getValueType().getStoreSize() <= SlotVT.getStoreSize()) && \"Not enough space for argument part!\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1713, __extension__
__PRETTY_FUNCTION__))
;
1714 ++I;
1715 }
1716 ArgValue = SpillSlot;
1717 } else
1718 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1719
1720 if (VA.isRegLoc()) {
1721 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
1722 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
1723 // and low values.
1724 if (VA.getLocVT() == MVT::i128)
1725 ArgValue = lowerI128ToGR128(DAG, ArgValue);
1726 // Queue up the argument copies and emit them at the end.
1727 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1728 } else {
1729 assert(VA.isMemLoc() && "Argument not register or memory")(static_cast <bool> (VA.isMemLoc() && "Argument not register or memory"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"Argument not register or memory\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1729, __extension__
__PRETTY_FUNCTION__))
;
1730
1731 // Work out the address of the stack slot. Unpromoted ints and
1732 // floats are passed as right-justified 8-byte values.
1733 if (!StackPtr.getNode())
1734 StackPtr = DAG.getCopyFromReg(Chain, DL,
1735 Regs->getStackPointerRegister(), PtrVT);
1736 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
1737 VA.getLocMemOffset();
1738 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1739 Offset += 4;
1740 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1741 DAG.getIntPtrConstant(Offset, DL));
1742
1743 // Emit the store.
1744 MemOpChains.push_back(
1745 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1746
1747 // Although long doubles or vectors are passed through the stack when
1748 // they are vararg (non-fixed arguments), if a long double or vector
1749 // occupies the third and fourth slot of the argument list GPR3 should
1750 // still shadow the third slot of the argument list.
1751 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
1752 SDValue ShadowArgValue =
1753 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
1754 DAG.getIntPtrConstant(1, DL));
1755 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
1756 }
1757 }
1758 }
1759
1760 // Join the stores, which are independent of one another.
1761 if (!MemOpChains.empty())
1762 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1763
1764 // Accept direct calls by converting symbolic call addresses to the
1765 // associated Target* opcodes. Force %r1 to be used for indirect
1766 // tail calls.
1767 SDValue Glue;
1768 // FIXME: Add support for XPLINK using the ADA register.
1769 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1770 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1771 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1772 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1773 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1774 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1775 } else if (IsTailCall) {
1776 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1777 Glue = Chain.getValue(1);
1778 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1779 }
1780
1781 // Build a sequence of copy-to-reg nodes, chained and glued together.
1782 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1783 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1784 RegsToPass[I].second, Glue);
1785 Glue = Chain.getValue(1);
1786 }
1787
1788 // The first call operand is the chain and the second is the target address.
1789 SmallVector<SDValue, 8> Ops;
1790 Ops.push_back(Chain);
1791 Ops.push_back(Callee);
1792
1793 // Add argument registers to the end of the list so that they are
1794 // known live into the call.
1795 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1796 Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1797 RegsToPass[I].second.getValueType()));
1798
1799 // Add a register mask operand representing the call-preserved registers.
1800 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1801 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1802 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1802, __extension__
__PRETTY_FUNCTION__))
;
1803 Ops.push_back(DAG.getRegisterMask(Mask));
1804
1805 // Glue the call to the argument copies, if any.
1806 if (Glue.getNode())
1807 Ops.push_back(Glue);
1808
1809 // Emit the call.
1810 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1811 if (IsTailCall)
1812 return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1813 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
1814 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
1815 Glue = Chain.getValue(1);
1816
1817 // Mark the end of the call, which is glued to the call itself.
1818 Chain = DAG.getCALLSEQ_END(Chain,
1819 DAG.getConstant(NumBytes, DL, PtrVT, true),
1820 DAG.getConstant(0, DL, PtrVT, true),
1821 Glue, DL);
1822 Glue = Chain.getValue(1);
1823
1824 // Assign locations to each value returned by this call.
1825 SmallVector<CCValAssign, 16> RetLocs;
1826 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
1827 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1828
1829 // Copy all of the result registers out of their specified physreg.
1830 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1831 CCValAssign &VA = RetLocs[I];
1832
1833 // Copy the value out, gluing the copy to the end of the call sequence.
1834 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1835 VA.getLocVT(), Glue);
1836 Chain = RetValue.getValue(1);
1837 Glue = RetValue.getValue(2);
1838
1839 // Convert the value of the return register into the value that's
1840 // being returned.
1841 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1842 }
1843
1844 return Chain;
1845}
1846
1847// Generate a call taking the given operands as arguments and returning a
1848// result of type RetVT.
1849std::pair<SDValue, SDValue> SystemZTargetLowering::makeExternalCall(
1850 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
1851 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
1852 bool DoesNotReturn, bool IsReturnValueUsed) const {
1853 TargetLowering::ArgListTy Args;
1854 Args.reserve(Ops.size());
1855
1856 TargetLowering::ArgListEntry Entry;
1857 for (SDValue Op : Ops) {
1858 Entry.Node = Op;
1859 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1860 Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
1861 Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
1862 Args.push_back(Entry);
1863 }
1864
1865 SDValue Callee =
1866 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
1867
1868 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
1869 TargetLowering::CallLoweringInfo CLI(DAG);
1870 bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned);
1871 CLI.setDebugLoc(DL)
1872 .setChain(Chain)
1873 .setCallee(CallConv, RetTy, Callee, std::move(Args))
1874 .setNoReturn(DoesNotReturn)
1875 .setDiscardResult(!IsReturnValueUsed)
1876 .setSExtResult(SignExtend)
1877 .setZExtResult(!SignExtend);
1878 return LowerCallTo(CLI);
1879}
1880
1881bool SystemZTargetLowering::
1882CanLowerReturn(CallingConv::ID CallConv,
1883 MachineFunction &MF, bool isVarArg,
1884 const SmallVectorImpl<ISD::OutputArg> &Outs,
1885 LLVMContext &Context) const {
1886 // Detect unsupported vector return types.
1887 if (Subtarget.hasVector())
1888 VerifyVectorTypes(Outs);
1889
1890 // Special case that we cannot easily detect in RetCC_SystemZ since
1891 // i128 is not a legal type.
1892 for (auto &Out : Outs)
1893 if (Out.ArgVT == MVT::i128)
1894 return false;
1895
1896 SmallVector<CCValAssign, 16> RetLocs;
1897 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1898 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1899}
1900
1901SDValue
1902SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
1903 bool IsVarArg,
1904 const SmallVectorImpl<ISD::OutputArg> &Outs,
1905 const SmallVectorImpl<SDValue> &OutVals,
1906 const SDLoc &DL, SelectionDAG &DAG) const {
1907 MachineFunction &MF = DAG.getMachineFunction();
1908
1909 // Detect unsupported vector return types.
1910 if (Subtarget.hasVector())
1911 VerifyVectorTypes(Outs);
1912
1913 // Assign locations to each returned value.
1914 SmallVector<CCValAssign, 16> RetLocs;
1915 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1916 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1917
1918 // Quick exit for void returns
1919 if (RetLocs.empty())
1920 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1921
1922 if (CallConv == CallingConv::GHC)
1923 report_fatal_error("GHC functions return void only");
1924
1925 // Copy the result values into the output registers.
1926 SDValue Glue;
1927 SmallVector<SDValue, 4> RetOps;
1928 RetOps.push_back(Chain);
1929 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1930 CCValAssign &VA = RetLocs[I];
1931 SDValue RetValue = OutVals[I];
1932
1933 // Make the return register live on exit.
1934 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 1934, __extension__
__PRETTY_FUNCTION__))
;
1935
1936 // Promote the value as required.
1937 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1938
1939 // Chain and glue the copies together.
1940 Register Reg = VA.getLocReg();
1941 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1942 Glue = Chain.getValue(1);
1943 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1944 }
1945
1946 // Update chain and glue.
1947 RetOps[0] = Chain;
1948 if (Glue.getNode())
1949 RetOps.push_back(Glue);
1950
1951 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
1952}
1953
1954// Return true if Op is an intrinsic node with chain that returns the CC value
1955// as its only (other) argument. Provide the associated SystemZISD opcode and
1956// the mask of valid CC values if so.
1957static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1958 unsigned &CCValid) {
1959 unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1960 switch (Id) {
1961 case Intrinsic::s390_tbegin:
1962 Opcode = SystemZISD::TBEGIN;
1963 CCValid = SystemZ::CCMASK_TBEGIN;
1964 return true;
1965
1966 case Intrinsic::s390_tbegin_nofloat:
1967 Opcode = SystemZISD::TBEGIN_NOFLOAT;
1968 CCValid = SystemZ::CCMASK_TBEGIN;
1969 return true;
1970
1971 case Intrinsic::s390_tend:
1972 Opcode = SystemZISD::TEND;
1973 CCValid = SystemZ::CCMASK_TEND;
1974 return true;
1975
1976 default:
1977 return false;
1978 }
1979}
1980
1981// Return true if Op is an intrinsic node without chain that returns the
1982// CC value as its final argument. Provide the associated SystemZISD
1983// opcode and the mask of valid CC values if so.
1984static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
1985 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1986 switch (Id) {
1987 case Intrinsic::s390_vpkshs:
1988 case Intrinsic::s390_vpksfs:
1989 case Intrinsic::s390_vpksgs:
1990 Opcode = SystemZISD::PACKS_CC;
1991 CCValid = SystemZ::CCMASK_VCMP;
1992 return true;
1993
1994 case Intrinsic::s390_vpklshs:
1995 case Intrinsic::s390_vpklsfs:
1996 case Intrinsic::s390_vpklsgs:
1997 Opcode = SystemZISD::PACKLS_CC;
1998 CCValid = SystemZ::CCMASK_VCMP;
1999 return true;
2000
2001 case Intrinsic::s390_vceqbs:
2002 case Intrinsic::s390_vceqhs:
2003 case Intrinsic::s390_vceqfs:
2004 case Intrinsic::s390_vceqgs:
2005 Opcode = SystemZISD::VICMPES;
2006 CCValid = SystemZ::CCMASK_VCMP;
2007 return true;
2008
2009 case Intrinsic::s390_vchbs:
2010 case Intrinsic::s390_vchhs:
2011 case Intrinsic::s390_vchfs:
2012 case Intrinsic::s390_vchgs:
2013 Opcode = SystemZISD::VICMPHS;
2014 CCValid = SystemZ::CCMASK_VCMP;
2015 return true;
2016
2017 case Intrinsic::s390_vchlbs:
2018 case Intrinsic::s390_vchlhs:
2019 case Intrinsic::s390_vchlfs:
2020 case Intrinsic::s390_vchlgs:
2021 Opcode = SystemZISD::VICMPHLS;
2022 CCValid = SystemZ::CCMASK_VCMP;
2023 return true;
2024
2025 case Intrinsic::s390_vtm:
2026 Opcode = SystemZISD::VTM;
2027 CCValid = SystemZ::CCMASK_VCMP;
2028 return true;
2029
2030 case Intrinsic::s390_vfaebs:
2031 case Intrinsic::s390_vfaehs:
2032 case Intrinsic::s390_vfaefs:
2033 Opcode = SystemZISD::VFAE_CC;
2034 CCValid = SystemZ::CCMASK_ANY;
2035 return true;
2036
2037 case Intrinsic::s390_vfaezbs:
2038 case Intrinsic::s390_vfaezhs:
2039 case Intrinsic::s390_vfaezfs:
2040 Opcode = SystemZISD::VFAEZ_CC;
2041 CCValid = SystemZ::CCMASK_ANY;
2042 return true;
2043
2044 case Intrinsic::s390_vfeebs:
2045 case Intrinsic::s390_vfeehs:
2046 case Intrinsic::s390_vfeefs:
2047 Opcode = SystemZISD::VFEE_CC;
2048 CCValid = SystemZ::CCMASK_ANY;
2049 return true;
2050
2051 case Intrinsic::s390_vfeezbs:
2052 case Intrinsic::s390_vfeezhs:
2053 case Intrinsic::s390_vfeezfs:
2054 Opcode = SystemZISD::VFEEZ_CC;
2055 CCValid = SystemZ::CCMASK_ANY;
2056 return true;
2057
2058 case Intrinsic::s390_vfenebs:
2059 case Intrinsic::s390_vfenehs:
2060 case Intrinsic::s390_vfenefs:
2061 Opcode = SystemZISD::VFENE_CC;
2062 CCValid = SystemZ::CCMASK_ANY;
2063 return true;
2064
2065 case Intrinsic::s390_vfenezbs:
2066 case Intrinsic::s390_vfenezhs:
2067 case Intrinsic::s390_vfenezfs:
2068 Opcode = SystemZISD::VFENEZ_CC;
2069 CCValid = SystemZ::CCMASK_ANY;
2070 return true;
2071
2072 case Intrinsic::s390_vistrbs:
2073 case Intrinsic::s390_vistrhs:
2074 case Intrinsic::s390_vistrfs:
2075 Opcode = SystemZISD::VISTR_CC;
2076 CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3;
2077 return true;
2078
2079 case Intrinsic::s390_vstrcbs:
2080 case Intrinsic::s390_vstrchs:
2081 case Intrinsic::s390_vstrcfs:
2082 Opcode = SystemZISD::VSTRC_CC;
2083 CCValid = SystemZ::CCMASK_ANY;
2084 return true;
2085
2086 case Intrinsic::s390_vstrczbs:
2087 case Intrinsic::s390_vstrczhs:
2088 case Intrinsic::s390_vstrczfs:
2089 Opcode = SystemZISD::VSTRCZ_CC;
2090 CCValid = SystemZ::CCMASK_ANY;
2091 return true;
2092
2093 case Intrinsic::s390_vstrsb:
2094 case Intrinsic::s390_vstrsh:
2095 case Intrinsic::s390_vstrsf:
2096 Opcode = SystemZISD::VSTRS_CC;
2097 CCValid = SystemZ::CCMASK_ANY;
2098 return true;
2099
2100 case Intrinsic::s390_vstrszb:
2101 case Intrinsic::s390_vstrszh:
2102 case Intrinsic::s390_vstrszf:
2103 Opcode = SystemZISD::VSTRSZ_CC;
2104 CCValid = SystemZ::CCMASK_ANY;
2105 return true;
2106
2107 case Intrinsic::s390_vfcedbs:
2108 case Intrinsic::s390_vfcesbs:
2109 Opcode = SystemZISD::VFCMPES;
2110 CCValid = SystemZ::CCMASK_VCMP;
2111 return true;
2112
2113 case Intrinsic::s390_vfchdbs:
2114 case Intrinsic::s390_vfchsbs:
2115 Opcode = SystemZISD::VFCMPHS;
2116 CCValid = SystemZ::CCMASK_VCMP;
2117 return true;
2118
2119 case Intrinsic::s390_vfchedbs:
2120 case Intrinsic::s390_vfchesbs:
2121 Opcode = SystemZISD::VFCMPHES;
2122 CCValid = SystemZ::CCMASK_VCMP;
2123 return true;
2124
2125 case Intrinsic::s390_vftcidb:
2126 case Intrinsic::s390_vftcisb:
2127 Opcode = SystemZISD::VFTCI;
2128 CCValid = SystemZ::CCMASK_VCMP;
2129 return true;
2130
2131 case Intrinsic::s390_tdc:
2132 Opcode = SystemZISD::TDC;
2133 CCValid = SystemZ::CCMASK_TDC;
2134 return true;
2135
2136 default:
2137 return false;
2138 }
2139}
2140
2141// Emit an intrinsic with chain and an explicit CC register result.
2142static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op,
2143 unsigned Opcode) {
2144 // Copy all operands except the intrinsic ID.
2145 unsigned NumOps = Op.getNumOperands();
2146 SmallVector<SDValue, 6> Ops;
2147 Ops.reserve(NumOps - 1);
2148 Ops.push_back(Op.getOperand(0));
2149 for (unsigned I = 2; I < NumOps; ++I)
2150 Ops.push_back(Op.getOperand(I));
2151
2152 assert(Op->getNumValues() == 2 && "Expected only CC result and chain")(static_cast <bool> (Op->getNumValues() == 2 &&
"Expected only CC result and chain") ? void (0) : __assert_fail
("Op->getNumValues() == 2 && \"Expected only CC result and chain\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2152, __extension__
__PRETTY_FUNCTION__))
;
2153 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2154 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2155 SDValue OldChain = SDValue(Op.getNode(), 1);
2156 SDValue NewChain = SDValue(Intr.getNode(), 1);
2157 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2158 return Intr.getNode();
2159}
2160
2161// Emit an intrinsic with an explicit CC register result.
2162static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op,
2163 unsigned Opcode) {
2164 // Copy all operands except the intrinsic ID.
2165 unsigned NumOps = Op.getNumOperands();
2166 SmallVector<SDValue, 6> Ops;
2167 Ops.reserve(NumOps - 1);
2168 for (unsigned I = 1; I < NumOps; ++I)
2169 Ops.push_back(Op.getOperand(I));
2170
2171 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2172 return Intr.getNode();
2173}
2174
2175// CC is a comparison that will be implemented using an integer or
2176// floating-point comparison. Return the condition code mask for
2177// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2178// unsigned comparisons and clear for signed ones. In the floating-point
2179// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2180static unsigned CCMaskForCondCode(ISD::CondCode CC) {
2181#define CONV(X) \
2182 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2183 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2184 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2185
2186 switch (CC) {
2187 default:
2188 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2188)
;
2189
2190 CONV(EQ);
2191 CONV(NE);
2192 CONV(GT);
2193 CONV(GE);
2194 CONV(LT);
2195 CONV(LE);
2196
2197 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2198 case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
2199 }
2200#undef CONV
2201}
2202
2203// If C can be converted to a comparison against zero, adjust the operands
2204// as necessary.
2205static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2206 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2207 return;
2208
2209 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2210 if (!ConstOp1)
2211 return;
2212
2213 int64_t Value = ConstOp1->getSExtValue();
2214 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2215 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2216 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2217 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2218 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2219 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2220 }
2221}
2222
2223// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2224// adjust the operands as necessary.
2225static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2226 Comparison &C) {
2227 // For us to make any changes, it must a comparison between a single-use
2228 // load and a constant.
2229 if (!C.Op0.hasOneUse() ||
2230 C.Op0.getOpcode() != ISD::LOAD ||
2231 C.Op1.getOpcode() != ISD::Constant)
2232 return;
2233
2234 // We must have an 8- or 16-bit load.
2235 auto *Load = cast<LoadSDNode>(C.Op0);
2236 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2237 if ((NumBits != 8 && NumBits != 16) ||
2238 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2239 return;
2240
2241 // The load must be an extending one and the constant must be within the
2242 // range of the unextended value.
2243 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2244 uint64_t Value = ConstOp1->getZExtValue();
2245 uint64_t Mask = (1 << NumBits) - 1;
2246 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2247 // Make sure that ConstOp1 is in range of C.Op0.
2248 int64_t SignedValue = ConstOp1->getSExtValue();
2249 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2250 return;
2251 if (C.ICmpType != SystemZICMP::SignedOnly) {
2252 // Unsigned comparison between two sign-extended values is equivalent
2253 // to unsigned comparison between two zero-extended values.
2254 Value &= Mask;
2255 } else if (NumBits == 8) {
2256 // Try to treat the comparison as unsigned, so that we can use CLI.
2257 // Adjust CCMask and Value as necessary.
2258 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2259 // Test whether the high bit of the byte is set.
2260 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2261 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2262 // Test whether the high bit of the byte is clear.
2263 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2264 else
2265 // No instruction exists for this combination.
2266 return;
2267 C.ICmpType = SystemZICMP::UnsignedOnly;
2268 }
2269 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2270 if (Value > Mask)
2271 return;
2272 // If the constant is in range, we can use any comparison.
2273 C.ICmpType = SystemZICMP::Any;
2274 } else
2275 return;
2276
2277 // Make sure that the first operand is an i32 of the right extension type.
2278 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2279 ISD::SEXTLOAD :
2280 ISD::ZEXTLOAD);
2281 if (C.Op0.getValueType() != MVT::i32 ||
2282 Load->getExtensionType() != ExtType) {
2283 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2284 Load->getBasePtr(), Load->getPointerInfo(),
2285 Load->getMemoryVT(), Load->getAlignment(),
2286 Load->getMemOperand()->getFlags());
2287 // Update the chain uses.
2288 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2289 }
2290
2291 // Make sure that the second operand is an i32 with the right value.
2292 if (C.Op1.getValueType() != MVT::i32 ||
2293 Value != ConstOp1->getZExtValue())
2294 C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2295}
2296
2297// Return true if Op is either an unextended load, or a load suitable
2298// for integer register-memory comparisons of type ICmpType.
2299static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2300 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2301 if (Load) {
2302 // There are no instructions to compare a register with a memory byte.
2303 if (Load->getMemoryVT() == MVT::i8)
2304 return false;
2305 // Otherwise decide on extension type.
2306 switch (Load->getExtensionType()) {
2307 case ISD::NON_EXTLOAD:
2308 return true;
2309 case ISD::SEXTLOAD:
2310 return ICmpType != SystemZICMP::UnsignedOnly;
2311 case ISD::ZEXTLOAD:
2312 return ICmpType != SystemZICMP::SignedOnly;
2313 default:
2314 break;
2315 }
2316 }
2317 return false;
2318}
2319
2320// Return true if it is better to swap the operands of C.
2321static bool shouldSwapCmpOperands(const Comparison &C) {
2322 // Leave f128 comparisons alone, since they have no memory forms.
2323 if (C.Op0.getValueType() == MVT::f128)
2324 return false;
2325
2326 // Always keep a floating-point constant second, since comparisons with
2327 // zero can use LOAD TEST and comparisons with other constants make a
2328 // natural memory operand.
2329 if (isa<ConstantFPSDNode>(C.Op1))
2330 return false;
2331
2332 // Never swap comparisons with zero since there are many ways to optimize
2333 // those later.
2334 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2335 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2336 return false;
2337
2338 // Also keep natural memory operands second if the loaded value is
2339 // only used here. Several comparisons have memory forms.
2340 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2341 return false;
2342
2343 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2344 // In that case we generally prefer the memory to be second.
2345 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2346 // The only exceptions are when the second operand is a constant and
2347 // we can use things like CHHSI.
2348 if (!ConstOp1)
2349 return true;
2350 // The unsigned memory-immediate instructions can handle 16-bit
2351 // unsigned integers.
2352 if (C.ICmpType != SystemZICMP::SignedOnly &&
2353 isUInt<16>(ConstOp1->getZExtValue()))
2354 return false;
2355 // The signed memory-immediate instructions can handle 16-bit
2356 // signed integers.
2357 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2358 isInt<16>(ConstOp1->getSExtValue()))
2359 return false;
2360 return true;
2361 }
2362
2363 // Try to promote the use of CGFR and CLGFR.
2364 unsigned Opcode0 = C.Op0.getOpcode();
2365 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2366 return true;
2367 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2368 return true;
2369 if (C.ICmpType != SystemZICMP::SignedOnly &&
2370 Opcode0 == ISD::AND &&
2371 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2372 cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
2373 return true;
2374
2375 return false;
2376}
2377
2378// Check whether C tests for equality between X and Y and whether X - Y
2379// or Y - X is also computed. In that case it's better to compare the
2380// result of the subtraction against zero.
2381static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
2382 Comparison &C) {
2383 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2384 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2385 for (SDNode *N : C.Op0->uses()) {
2386 if (N->getOpcode() == ISD::SUB &&
2387 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2388 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2389 C.Op0 = SDValue(N, 0);
2390 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2391 return;
2392 }
2393 }
2394 }
2395}
2396
2397// Check whether C compares a floating-point value with zero and if that
2398// floating-point value is also negated. In this case we can use the
2399// negation to set CC, so avoiding separate LOAD AND TEST and
2400// LOAD (NEGATIVE/COMPLEMENT) instructions.
2401static void adjustForFNeg(Comparison &C) {
2402 // This optimization is invalid for strict comparisons, since FNEG
2403 // does not raise any exceptions.
2404 if (C.Chain)
2405 return;
2406 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2407 if (C1 && C1->isZero()) {
2408 for (SDNode *N : C.Op0->uses()) {
2409 if (N->getOpcode() == ISD::FNEG) {
2410 C.Op0 = SDValue(N, 0);
2411 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2412 return;
2413 }
2414 }
2415 }
2416}
2417
2418// Check whether C compares (shl X, 32) with 0 and whether X is
2419// also sign-extended. In that case it is better to test the result
2420// of the sign extension using LTGFR.
2421//
2422// This case is important because InstCombine transforms a comparison
2423// with (sext (trunc X)) into a comparison with (shl X, 32).
2424static void adjustForLTGFR(Comparison &C) {
2425 // Check for a comparison between (shl X, 32) and 0.
2426 if (C.Op0.getOpcode() == ISD::SHL &&
2427 C.Op0.getValueType() == MVT::i64 &&
2428 C.Op1.getOpcode() == ISD::Constant &&
2429 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2430 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2431 if (C1 && C1->getZExtValue() == 32) {
2432 SDValue ShlOp0 = C.Op0.getOperand(0);
2433 // See whether X has any SIGN_EXTEND_INREG uses.
2434 for (SDNode *N : ShlOp0->uses()) {
2435 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2436 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2437 C.Op0 = SDValue(N, 0);
2438 return;
2439 }
2440 }
2441 }
2442 }
2443}
2444
2445// If C compares the truncation of an extending load, try to compare
2446// the untruncated value instead. This exposes more opportunities to
2447// reuse CC.
2448static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2449 Comparison &C) {
2450 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2451 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2452 C.Op1.getOpcode() == ISD::Constant &&
2453 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2454 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2455 if (L->getMemoryVT().getStoreSizeInBits().getFixedSize() <=
2456 C.Op0.getValueSizeInBits().getFixedSize()) {
2457 unsigned Type = L->getExtensionType();
2458 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2459 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2460 C.Op0 = C.Op0.getOperand(0);
2461 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2462 }
2463 }
2464 }
2465}
2466
2467// Return true if shift operation N has an in-range constant shift value.
2468// Store it in ShiftVal if so.
2469static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2470 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2471 if (!Shift)
2472 return false;
2473
2474 uint64_t Amount = Shift->getZExtValue();
2475 if (Amount >= N.getValueSizeInBits())
2476 return false;
2477
2478 ShiftVal = Amount;
2479 return true;
2480}
2481
2482// Check whether an AND with Mask is suitable for a TEST UNDER MASK
2483// instruction and whether the CC value is descriptive enough to handle
2484// a comparison of type Opcode between the AND result and CmpVal.
2485// CCMask says which comparison result is being tested and BitSize is
2486// the number of bits in the operands. If TEST UNDER MASK can be used,
2487// return the corresponding CC mask, otherwise return 0.
2488static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2489 uint64_t Mask, uint64_t CmpVal,
2490 unsigned ICmpType) {
2491 assert(Mask != 0 && "ANDs with zero should have been removed by now")(static_cast <bool> (Mask != 0 && "ANDs with zero should have been removed by now"
) ? void (0) : __assert_fail ("Mask != 0 && \"ANDs with zero should have been removed by now\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2491, __extension__
__PRETTY_FUNCTION__))
;
2492
2493 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2494 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2495 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2496 return 0;
2497
2498 // Work out the masks for the lowest and highest bits.
2499 unsigned HighShift = 63 - countLeadingZeros(Mask);
2500 uint64_t High = uint64_t(1) << HighShift;
2501 uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
2502
2503 // Signed ordered comparisons are effectively unsigned if the sign
2504 // bit is dropped.
2505 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2506
2507 // Check for equality comparisons with 0, or the equivalent.
2508 if (CmpVal == 0) {
2509 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2510 return SystemZ::CCMASK_TM_ALL_0;
2511 if (CCMask == SystemZ::CCMASK_CMP_NE)
2512 return SystemZ::CCMASK_TM_SOME_1;
2513 }
2514 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2515 if (CCMask == SystemZ::CCMASK_CMP_LT)
2516 return SystemZ::CCMASK_TM_ALL_0;
2517 if (CCMask == SystemZ::CCMASK_CMP_GE)
2518 return SystemZ::CCMASK_TM_SOME_1;
2519 }
2520 if (EffectivelyUnsigned && CmpVal < Low) {
2521 if (CCMask == SystemZ::CCMASK_CMP_LE)
2522 return SystemZ::CCMASK_TM_ALL_0;
2523 if (CCMask == SystemZ::CCMASK_CMP_GT)
2524 return SystemZ::CCMASK_TM_SOME_1;
2525 }
2526
2527 // Check for equality comparisons with the mask, or the equivalent.
2528 if (CmpVal == Mask) {
2529 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2530 return SystemZ::CCMASK_TM_ALL_1;
2531 if (CCMask == SystemZ::CCMASK_CMP_NE)
2532 return SystemZ::CCMASK_TM_SOME_0;
2533 }
2534 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2535 if (CCMask == SystemZ::CCMASK_CMP_GT)
2536 return SystemZ::CCMASK_TM_ALL_1;
2537 if (CCMask == SystemZ::CCMASK_CMP_LE)
2538 return SystemZ::CCMASK_TM_SOME_0;
2539 }
2540 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2541 if (CCMask == SystemZ::CCMASK_CMP_GE)
2542 return SystemZ::CCMASK_TM_ALL_1;
2543 if (CCMask == SystemZ::CCMASK_CMP_LT)
2544 return SystemZ::CCMASK_TM_SOME_0;
2545 }
2546
2547 // Check for ordered comparisons with the top bit.
2548 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2549 if (CCMask == SystemZ::CCMASK_CMP_LE)
2550 return SystemZ::CCMASK_TM_MSB_0;
2551 if (CCMask == SystemZ::CCMASK_CMP_GT)
2552 return SystemZ::CCMASK_TM_MSB_1;
2553 }
2554 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2555 if (CCMask == SystemZ::CCMASK_CMP_LT)
2556 return SystemZ::CCMASK_TM_MSB_0;
2557 if (CCMask == SystemZ::CCMASK_CMP_GE)
2558 return SystemZ::CCMASK_TM_MSB_1;
2559 }
2560
2561 // If there are just two bits, we can do equality checks for Low and High
2562 // as well.
2563 if (Mask == Low + High) {
2564 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2565 return SystemZ::CCMASK_TM_MIXED_MSB_0;
2566 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2567 return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY;
2568 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2569 return SystemZ::CCMASK_TM_MIXED_MSB_1;
2570 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2571 return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY;
2572 }
2573
2574 // Looks like we've exhausted our options.
2575 return 0;
2576}
2577
2578// See whether C can be implemented as a TEST UNDER MASK instruction.
2579// Update the arguments with the TM version if so.
2580static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
2581 Comparison &C) {
2582 // Check that we have a comparison with a constant.
2583 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2584 if (!ConstOp1)
2585 return;
2586 uint64_t CmpVal = ConstOp1->getZExtValue();
2587
2588 // Check whether the nonconstant input is an AND with a constant mask.
2589 Comparison NewC(C);
2590 uint64_t MaskVal;
2591 ConstantSDNode *Mask = nullptr;
2592 if (C.Op0.getOpcode() == ISD::AND) {
2593 NewC.Op0 = C.Op0.getOperand(0);
2594 NewC.Op1 = C.Op0.getOperand(1);
2595 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2596 if (!Mask)
2597 return;
2598 MaskVal = Mask->getZExtValue();
2599 } else {
2600 // There is no instruction to compare with a 64-bit immediate
2601 // so use TMHH instead if possible. We need an unsigned ordered
2602 // comparison with an i64 immediate.
2603 if (NewC.Op0.getValueType() != MVT::i64 ||
2604 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2605 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2606 NewC.ICmpType == SystemZICMP::SignedOnly)
2607 return;
2608 // Convert LE and GT comparisons into LT and GE.
2609 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2610 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2611 if (CmpVal == uint64_t(-1))
2612 return;
2613 CmpVal += 1;
2614 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2615 }
2616 // If the low N bits of Op1 are zero than the low N bits of Op0 can
2617 // be masked off without changing the result.
2618 MaskVal = -(CmpVal & -CmpVal);
2619 NewC.ICmpType = SystemZICMP::UnsignedOnly;
2620 }
2621 if (!MaskVal)
2622 return;
2623
2624 // Check whether the combination of mask, comparison value and comparison
2625 // type are suitable.
2626 unsigned BitSize = NewC.Op0.getValueSizeInBits();
2627 unsigned NewCCMask, ShiftVal;
2628 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2629 NewC.Op0.getOpcode() == ISD::SHL &&
2630 isSimpleShift(NewC.Op0, ShiftVal) &&
2631 (MaskVal >> ShiftVal != 0) &&
2632 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2633 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2634 MaskVal >> ShiftVal,
2635 CmpVal >> ShiftVal,
2636 SystemZICMP::Any))) {
2637 NewC.Op0 = NewC.Op0.getOperand(0);
2638 MaskVal >>= ShiftVal;
2639 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2640 NewC.Op0.getOpcode() == ISD::SRL &&
2641 isSimpleShift(NewC.Op0, ShiftVal) &&
2642 (MaskVal << ShiftVal != 0) &&
2643 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2644 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2645 MaskVal << ShiftVal,
2646 CmpVal << ShiftVal,
2647 SystemZICMP::UnsignedOnly))) {
2648 NewC.Op0 = NewC.Op0.getOperand(0);
2649 MaskVal <<= ShiftVal;
2650 } else {
2651 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2652 NewC.ICmpType);
2653 if (!NewCCMask)
2654 return;
2655 }
2656
2657 // Go ahead and make the change.
2658 C.Opcode = SystemZISD::TM;
2659 C.Op0 = NewC.Op0;
2660 if (Mask && Mask->getZExtValue() == MaskVal)
2661 C.Op1 = SDValue(Mask, 0);
2662 else
2663 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2664 C.CCValid = SystemZ::CCMASK_TM;
2665 C.CCMask = NewCCMask;
2666}
2667
2668// See whether the comparison argument contains a redundant AND
2669// and remove it if so. This sometimes happens due to the generic
2670// BRCOND expansion.
2671static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
2672 Comparison &C) {
2673 if (C.Op0.getOpcode() != ISD::AND)
2674 return;
2675 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2676 if (!Mask)
2677 return;
2678 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
2679 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2680 return;
2681
2682 C.Op0 = C.Op0.getOperand(0);
2683}
2684
2685// Return a Comparison that tests the condition-code result of intrinsic
2686// node Call against constant integer CC using comparison code Cond.
2687// Opcode is the opcode of the SystemZISD operation for the intrinsic
2688// and CCValid is the set of possible condition-code results.
2689static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2690 SDValue Call, unsigned CCValid, uint64_t CC,
2691 ISD::CondCode Cond) {
2692 Comparison C(Call, SDValue(), SDValue());
2693 C.Opcode = Opcode;
2694 C.CCValid = CCValid;
2695 if (Cond == ISD::SETEQ)
2696 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2697 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2698 else if (Cond == ISD::SETNE)
2699 // ...and the inverse of that.
2700 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2701 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2702 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2703 // always true for CC>3.
2704 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
2705 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2706 // ...and the inverse of that.
2707 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2708 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2709 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2710 // always true for CC>3.
2711 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2712 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2713 // ...and the inverse of that.
2714 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
2715 else
2716 llvm_unreachable("Unexpected integer comparison type")::llvm::llvm_unreachable_internal("Unexpected integer comparison type"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2716)
;
2717 C.CCMask &= CCValid;
2718 return C;
2719}
2720
2721// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2722static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2723 ISD::CondCode Cond, const SDLoc &DL,
2724 SDValue Chain = SDValue(),
2725 bool IsSignaling = false) {
2726 if (CmpOp1.getOpcode() == ISD::Constant) {
2727 assert(!Chain)(static_cast <bool> (!Chain) ? void (0) : __assert_fail
("!Chain", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2727, __extension__ __PRETTY_FUNCTION__))
;
2728 uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2729 unsigned Opcode, CCValid;
2730 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2731 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2732 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2733 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2734 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2735 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2736 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2737 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2738 }
2739 Comparison C(CmpOp0, CmpOp1, Chain);
2740 C.CCMask = CCMaskForCondCode(Cond);
2741 if (C.Op0.getValueType().isFloatingPoint()) {
2742 C.CCValid = SystemZ::CCMASK_FCMP;
2743 if (!C.Chain)
2744 C.Opcode = SystemZISD::FCMP;
2745 else if (!IsSignaling)
2746 C.Opcode = SystemZISD::STRICT_FCMP;
2747 else
2748 C.Opcode = SystemZISD::STRICT_FCMPS;
2749 adjustForFNeg(C);
2750 } else {
2751 assert(!C.Chain)(static_cast <bool> (!C.Chain) ? void (0) : __assert_fail
("!C.Chain", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2751, __extension__ __PRETTY_FUNCTION__))
;
2752 C.CCValid = SystemZ::CCMASK_ICMP;
2753 C.Opcode = SystemZISD::ICMP;
2754 // Choose the type of comparison. Equality and inequality tests can
2755 // use either signed or unsigned comparisons. The choice also doesn't
2756 // matter if both sign bits are known to be clear. In those cases we
2757 // want to give the main isel code the freedom to choose whichever
2758 // form fits best.
2759 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2760 C.CCMask == SystemZ::CCMASK_CMP_NE ||
2761 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2762 C.ICmpType = SystemZICMP::Any;
2763 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2764 C.ICmpType = SystemZICMP::UnsignedOnly;
2765 else
2766 C.ICmpType = SystemZICMP::SignedOnly;
2767 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2768 adjustForRedundantAnd(DAG, DL, C);
2769 adjustZeroCmp(DAG, DL, C);
2770 adjustSubwordCmp(DAG, DL, C);
2771 adjustForSubtraction(DAG, DL, C);
2772 adjustForLTGFR(C);
2773 adjustICmpTruncate(DAG, DL, C);
2774 }
2775
2776 if (shouldSwapCmpOperands(C)) {
2777 std::swap(C.Op0, C.Op1);
2778 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2779 }
2780
2781 adjustForTestUnderMask(DAG, DL, C);
2782 return C;
2783}
2784
2785// Emit the comparison instruction described by C.
2786static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2787 if (!C.Op1.getNode()) {
2788 SDNode *Node;
2789 switch (C.Op0.getOpcode()) {
2790 case ISD::INTRINSIC_W_CHAIN:
2791 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
2792 return SDValue(Node, 0);
2793 case ISD::INTRINSIC_WO_CHAIN:
2794 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
2795 return SDValue(Node, Node->getNumValues() - 1);
2796 default:
2797 llvm_unreachable("Invalid comparison operands")::llvm::llvm_unreachable_internal("Invalid comparison operands"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 2797)
;
2798 }
2799 }
2800 if (C.Opcode == SystemZISD::ICMP)
2801 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
2802 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
2803 if (C.Opcode == SystemZISD::TM) {
2804 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2805 bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2806 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
2807 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
2808 }
2809 if (C.Chain) {
2810 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
2811 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
2812 }
2813 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
2814}
2815
2816// Implement a 32-bit *MUL_LOHI operation by extending both operands to
2817// 64 bits. Extend is the extension type to use. Store the high part
2818// in Hi and the low part in Lo.
2819static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
2820 SDValue Op0, SDValue Op1, SDValue &Hi,
2821 SDValue &Lo) {
2822 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2823 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2824 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
2825 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2826 DAG.getConstant(32, DL, MVT::i64));
2827 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2828 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2829}
2830
2831// Lower a binary operation that produces two VT results, one in each
2832// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2833// and Opcode performs the GR128 operation. Store the even register result
2834// in Even and the odd register result in Odd.
2835static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2836 unsigned Opcode, SDValue Op0, SDValue Op1,
2837 SDValue &Even, SDValue &Odd) {
2838 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
2839 bool Is32Bit = is32Bit(VT);
2840 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2841 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
2842}
2843
2844// Return an i32 value that is 1 if the CC value produced by CCReg is
2845// in the mask CCMask and 0 otherwise. CC is known to have a value
2846// in CCValid, so other values can be ignored.
2847static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
2848 unsigned CCValid, unsigned CCMask) {
2849 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
2850 DAG.getConstant(0, DL, MVT::i32),
2851 DAG.getTargetConstant(CCValid, DL, MVT::i32),
2852 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
2853 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
2854}
2855
2856// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2857// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
2858// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
2859// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
2860// floating-point comparisons.
2861enum class CmpMode { Int, FP, StrictFP, SignalingFP };
2862static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) {
2863 switch (CC) {
2864 case ISD::SETOEQ:
2865 case ISD::SETEQ:
2866 switch (Mode) {
2867 case CmpMode::Int: return SystemZISD::VICMPE;
2868 case CmpMode::FP: return SystemZISD::VFCMPE;
2869 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
2870 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
2871 }
2872 llvm_unreachable("Bad mode")::llvm::llvm_unreachable_internal("Bad mode", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2872)
;
2873
2874 case ISD::SETOGE:
2875 case ISD::SETGE:
2876 switch (Mode) {
2877 case CmpMode::Int: return 0;
2878 case CmpMode::FP: return SystemZISD::VFCMPHE;
2879 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
2880 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
2881 }
2882 llvm_unreachable("Bad mode")::llvm::llvm_unreachable_internal("Bad mode", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2882)
;
2883
2884 case ISD::SETOGT:
2885 case ISD::SETGT:
2886 switch (Mode) {
2887 case CmpMode::Int: return SystemZISD::VICMPH;
2888 case CmpMode::FP: return SystemZISD::VFCMPH;
2889 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
2890 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
2891 }
2892 llvm_unreachable("Bad mode")::llvm::llvm_unreachable_internal("Bad mode", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2892)
;
2893
2894 case ISD::SETUGT:
2895 switch (Mode) {
2896 case CmpMode::Int: return SystemZISD::VICMPHL;
2897 case CmpMode::FP: return 0;
2898 case CmpMode::StrictFP: return 0;
2899 case CmpMode::SignalingFP: return 0;
2900 }
2901 llvm_unreachable("Bad mode")::llvm::llvm_unreachable_internal("Bad mode", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2901)
;
2902
2903 default:
2904 return 0;
2905 }
2906}
2907
2908// Return the SystemZISD vector comparison operation for CC or its inverse,
2909// or 0 if neither can be done directly. Indicate in Invert whether the
2910// result is for the inverse of CC. Mode is as above.
2911static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode,
2912 bool &Invert) {
2913 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2914 Invert = false;
2915 return Opcode;
2916 }
2917
2918 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
2919 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2920 Invert = true;
2921 return Opcode;
2922 }
2923
2924 return 0;
2925}
2926
2927// Return a v2f64 that contains the extended form of elements Start and Start+1
2928// of v4f32 value Op. If Chain is nonnull, return the strict form.
2929static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
2930 SDValue Op, SDValue Chain) {
2931 int Mask[] = { Start, -1, Start + 1, -1 };
2932 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
2933 if (Chain) {
2934 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
2935 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
2936 }
2937 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2938}
2939
2940// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2941// producing a result of type VT. If Chain is nonnull, return the strict form.
2942SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
2943 const SDLoc &DL, EVT VT,
2944 SDValue CmpOp0,
2945 SDValue CmpOp1,
2946 SDValue Chain) const {
2947 // There is no hardware support for v4f32 (unless we have the vector
2948 // enhancements facility 1), so extend the vector into two v2f64s
2949 // and compare those.
2950 if (CmpOp0.getValueType() == MVT::v4f32 &&
2951 !Subtarget.hasVectorEnhancements1()) {
2952 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
2953 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
2954 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
2955 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
2956 if (Chain) {
2957 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
2958 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
2959 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
2960 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2961 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
2962 H1.getValue(1), L1.getValue(1),
2963 HRes.getValue(1), LRes.getValue(1) };
2964 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
2965 SDValue Ops[2] = { Res, NewChain };
2966 return DAG.getMergeValues(Ops, DL);
2967 }
2968 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
2969 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
2970 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2971 }
2972 if (Chain) {
2973 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
2974 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
2975 }
2976 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
2977}
2978
2979// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
2980// an integer mask of type VT. If Chain is nonnull, we have a strict
2981// floating-point comparison. If in addition IsSignaling is true, we have
2982// a strict signaling floating-point comparison.
2983SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
2984 const SDLoc &DL, EVT VT,
2985 ISD::CondCode CC,
2986 SDValue CmpOp0,
2987 SDValue CmpOp1,
2988 SDValue Chain,
2989 bool IsSignaling) const {
2990 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
2991 assert (!Chain || IsFP)(static_cast <bool> (!Chain || IsFP) ? void (0) : __assert_fail
("!Chain || IsFP", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2991, __extension__ __PRETTY_FUNCTION__))
;
2992 assert (!IsSignaling || Chain)(static_cast <bool> (!IsSignaling || Chain) ? void (0) :
__assert_fail ("!IsSignaling || Chain", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 2992, __extension__ __PRETTY_FUNCTION__))
;
2993 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
2994 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
2995 bool Invert = false;
2996 SDValue Cmp;
2997 switch (CC) {
2998 // Handle tests for order using (or (ogt y x) (oge x y)).
2999 case ISD::SETUO:
3000 Invert = true;
3001 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3002 case ISD::SETO: {
3003 assert(IsFP && "Unexpected integer comparison")(static_cast <bool> (IsFP && "Unexpected integer comparison"
) ? void (0) : __assert_fail ("IsFP && \"Unexpected integer comparison\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 3003, __extension__
__PRETTY_FUNCTION__))
;
3004 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3005 DL, VT, CmpOp1, CmpOp0, Chain);
3006 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3007 DL, VT, CmpOp0, CmpOp1, Chain);
3008 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3009 if (Chain)
3010 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3011 LT.getValue(1), GE.getValue(1));
3012 break;
3013 }
3014
3015 // Handle <> tests using (or (ogt y x) (ogt x y)).
3016 case ISD::SETUEQ:
3017 Invert = true;
3018 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3019 case ISD::SETONE: {
3020 assert(IsFP && "Unexpected integer comparison")(static_cast <bool> (IsFP && "Unexpected integer comparison"
) ? void (0) : __assert_fail ("IsFP && \"Unexpected integer comparison\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 3020, __extension__
__PRETTY_FUNCTION__))
;
3021 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3022 DL, VT, CmpOp1, CmpOp0, Chain);
3023 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3024 DL, VT, CmpOp0, CmpOp1, Chain);
3025 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3026 if (Chain)
3027 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3028 LT.getValue(1), GT.getValue(1));
3029 break;
3030 }
3031
3032 // Otherwise a single comparison is enough. It doesn't really
3033 // matter whether we try the inversion or the swap first, since
3034 // there are no cases where both work.
3035 default:
3036 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3037 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3038 else {
3039 CC = ISD::getSetCCSwappedOperands(CC);
3040 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3041 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3042 else
3043 llvm_unreachable("Unhandled comparison")::llvm::llvm_unreachable_internal("Unhandled comparison", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 3043)
;
3044 }
3045 if (Chain)
3046 Chain = Cmp.getValue(1);
3047 break;
3048 }
3049 if (Invert) {
3050 SDValue Mask =
3051 DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
3052 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3053 }
3054 if (Chain && Chain.getNode() != Cmp.getNode()) {
3055 SDValue Ops[2] = { Cmp, Chain };
3056 Cmp = DAG.getMergeValues(Ops, DL);
3057 }
3058 return Cmp;
3059}
3060
3061SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3062 SelectionDAG &DAG) const {
3063 SDValue CmpOp0 = Op.getOperand(0);
3064 SDValue CmpOp1 = Op.getOperand(1);
3065 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3066 SDLoc DL(Op);
3067 EVT VT = Op.getValueType();
3068 if (VT.isVector())
3069 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3070
3071 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3072 SDValue CCReg = emitCmp(DAG, DL, C);
3073 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3074}
3075
3076SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3077 SelectionDAG &DAG,
3078 bool IsSignaling) const {
3079 SDValue Chain = Op.getOperand(0);
3080 SDValue CmpOp0 = Op.getOperand(1);
3081 SDValue CmpOp1 = Op.getOperand(2);
3082 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3083 SDLoc DL(Op);
3084 EVT VT = Op.getNode()->getValueType(0);
3085 if (VT.isVector()) {
3086 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3087 Chain, IsSignaling);
3088 return Res.getValue(Op.getResNo());
3089 }
3090
3091 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3092 SDValue CCReg = emitCmp(DAG, DL, C);
3093 CCReg->setFlags(Op->getFlags());
3094 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3095 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3096 return DAG.getMergeValues(Ops, DL);
3097}
3098
3099SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3100 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3101 SDValue CmpOp0 = Op.getOperand(2);
3102 SDValue CmpOp1 = Op.getOperand(3);
3103 SDValue Dest = Op.getOperand(4);
3104 SDLoc DL(Op);
3105
3106 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3107 SDValue CCReg = emitCmp(DAG, DL, C);
3108 return DAG.getNode(
3109 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3110 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3111 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3112}
3113
3114// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3115// allowing Pos and Neg to be wider than CmpOp.
3116static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3117 return (Neg.getOpcode() == ISD::SUB &&
3118 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3119 cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
3120 Neg.getOperand(1) == Pos &&
3121 (Pos == CmpOp ||
3122 (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3123 Pos.getOperand(0) == CmpOp)));
3124}
3125
3126// Return the absolute or negative absolute of Op; IsNegative decides which.
3127static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op,
3128 bool IsNegative) {
3129 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3130 if (IsNegative)
3131 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3132 DAG.getConstant(0, DL, Op.getValueType()), Op);
3133 return Op;
3134}
3135
3136SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3137 SelectionDAG &DAG) const {
3138 SDValue CmpOp0 = Op.getOperand(0);
3139 SDValue CmpOp1 = Op.getOperand(1);
3140 SDValue TrueOp = Op.getOperand(2);
3141 SDValue FalseOp = Op.getOperand(3);
3142 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3143 SDLoc DL(Op);
3144
3145 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3146
3147 // Check for absolute and negative-absolute selections, including those
3148 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3149 // This check supplements the one in DAGCombiner.
3150 if (C.Opcode == SystemZISD::ICMP &&
3151 C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3152 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3153 C.Op1.getOpcode() == ISD::Constant &&
3154 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
3155 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3156 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3157 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3158 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3159 }
3160
3161 SDValue CCReg = emitCmp(DAG, DL, C);
3162 SDValue Ops[] = {TrueOp, FalseOp,
3163 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3164 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3165
3166 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3167}
3168
3169SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3170 SelectionDAG &DAG) const {
3171 SDLoc DL(Node);
3172 const GlobalValue *GV = Node->getGlobal();
3173 int64_t Offset = Node->getOffset();
3174 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3175 CodeModel::Model CM = DAG.getTarget().getCodeModel();
3176
3177 SDValue Result;
3178 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3179 if (isInt<32>(Offset)) {
3180 // Assign anchors at 1<<12 byte boundaries.
3181 uint64_t Anchor = Offset & ~uint64_t(0xfff);
3182 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3183 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3184
3185 // The offset can be folded into the address if it is aligned to a
3186 // halfword.
3187 Offset -= Anchor;
3188 if (Offset != 0 && (Offset & 1) == 0) {
3189 SDValue Full =
3190 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3191 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3192 Offset = 0;
3193 }
3194 } else {
3195 // Conservatively load a constant offset greater than 32 bits into a
3196 // register below.
3197 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3198 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3199 }
3200 } else {
3201 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3202 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3203 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3204 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3205 }
3206
3207 // If there was a non-zero offset that we didn't fold, create an explicit
3208 // addition for it.
3209 if (Offset != 0)
3210 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3211 DAG.getConstant(Offset, DL, PtrVT));
3212
3213 return Result;
3214}
3215
3216SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3217 SelectionDAG &DAG,
3218 unsigned Opcode,
3219 SDValue GOTOffset) const {
3220 SDLoc DL(Node);
3221 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3222 SDValue Chain = DAG.getEntryNode();
3223 SDValue Glue;
3224
3225 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3226 CallingConv::GHC)
3227 report_fatal_error("In GHC calling convention TLS is not supported");
3228
3229 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3230 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3231 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3232 Glue = Chain.getValue(1);
3233 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3234 Glue = Chain.getValue(1);
3235
3236 // The first call operand is the chain and the second is the TLS symbol.
3237 SmallVector<SDValue, 8> Ops;
3238 Ops.push_back(Chain);
3239 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3240 Node->getValueType(0),
3241 0, 0));
3242
3243 // Add argument registers to the end of the list so that they are
3244 // known live into the call.
3245 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3246 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3247
3248 // Add a register mask operand representing the call-preserved registers.
3249 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3250 const uint32_t *Mask =
3251 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3252 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 3252, __extension__
__PRETTY_FUNCTION__))
;
3253 Ops.push_back(DAG.getRegisterMask(Mask));
3254
3255 // Glue the call to the argument copies.
3256 Ops.push_back(Glue);
3257
3258 // Emit the call.
3259 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3260 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3261 Glue = Chain.getValue(1);
3262
3263 // Copy the return value from %r2.
3264 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3265}
3266
3267SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3268 SelectionDAG &DAG) const {
3269 SDValue Chain = DAG.getEntryNode();
3270 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3271
3272 // The high part of the thread pointer is in access register 0.
3273 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3274 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3275
3276 // The low part of the thread pointer is in access register 1.
3277 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3278 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3279
3280 // Merge them into a single 64-bit address.
3281 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3282 DAG.getConstant(32, DL, PtrVT));
3283 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3284}
3285
3286SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3287 SelectionDAG &DAG) const {
3288 if (DAG.getTarget().useEmulatedTLS())
3289 return LowerToTLSEmulatedModel(Node, DAG);
3290 SDLoc DL(Node);
3291 const GlobalValue *GV = Node->getGlobal();
3292 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3293 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
3294
3295 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3296 CallingConv::GHC)
3297 report_fatal_error("In GHC calling convention TLS is not supported");
3298
3299 SDValue TP = lowerThreadPointer(DL, DAG);
3300
3301 // Get the offset of GA from the thread pointer, based on the TLS model.
3302 SDValue Offset;
3303 switch (model) {
3304 case TLSModel::GeneralDynamic: {
3305 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3306 SystemZConstantPoolValue *CPV =
3307 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
3308
3309 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3310 Offset = DAG.getLoad(
3311 PtrVT, DL, DAG.getEntryNode(), Offset,
3312 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3313
3314 // Call __tls_get_offset to retrieve the offset.
3315 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3316 break;
3317 }
3318
3319 case TLSModel::LocalDynamic: {
3320 // Load the GOT offset of the module ID.
3321 SystemZConstantPoolValue *CPV =
3322 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
3323
3324 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3325 Offset = DAG.getLoad(
3326 PtrVT, DL, DAG.getEntryNode(), Offset,
3327 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3328
3329 // Call __tls_get_offset to retrieve the module base offset.
3330 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3331
3332 // Note: The SystemZLDCleanupPass will remove redundant computations
3333 // of the module base offset. Count total number of local-dynamic
3334 // accesses to trigger execution of that pass.
3335 SystemZMachineFunctionInfo* MFI =
3336 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
3337 MFI->incNumLocalDynamicTLSAccesses();
3338
3339 // Add the per-symbol offset.
3340 CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
3341
3342 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3343 DTPOffset = DAG.getLoad(
3344 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3345 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3346
3347 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3348 break;
3349 }
3350
3351 case TLSModel::InitialExec: {
3352 // Load the offset from the GOT.
3353 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3354 SystemZII::MO_INDNTPOFF);
3355 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
3356 Offset =
3357 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3358 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3359 break;
3360 }
3361
3362 case TLSModel::LocalExec: {
3363 // Force the offset into the constant pool and load it from there.
3364 SystemZConstantPoolValue *CPV =
3365 SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
3366
3367 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3368 Offset = DAG.getLoad(
3369 PtrVT, DL, DAG.getEntryNode(), Offset,
3370 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3371 break;
3372 }
3373 }
3374
3375 // Add the base and offset together.
3376 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3377}
3378
3379SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3380 SelectionDAG &DAG) const {
3381 SDLoc DL(Node);
3382 const BlockAddress *BA = Node->getBlockAddress();
3383 int64_t Offset = Node->getOffset();
3384 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3385
3386 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3387 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3388 return Result;
3389}
3390
3391SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3392 SelectionDAG &DAG) const {
3393 SDLoc DL(JT);
3394 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3395 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3396
3397 // Use LARL to load the address of the table.
3398 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3399}
3400
3401SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3402 SelectionDAG &DAG) const {
3403 SDLoc DL(CP);
3404 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3405
3406 SDValue Result;
3407 if (CP->isMachineConstantPoolEntry())
3408 Result =
3409 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3410 else
3411 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3412 CP->getOffset());
3413
3414 // Use LARL to load the address of the constant pool entry.
3415 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3416}
3417
3418SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3419 SelectionDAG &DAG) const {
3420 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
3421 MachineFunction &MF = DAG.getMachineFunction();
3422 MachineFrameInfo &MFI = MF.getFrameInfo();
3423 MFI.setFrameAddressIsTaken(true);
3424
3425 SDLoc DL(Op);
3426 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3427 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3428
3429 // By definition, the frame address is the address of the back chain. (In
3430 // the case of packed stack without backchain, return the address where the
3431 // backchain would have been stored. This will either be an unused space or
3432 // contain a saved register).
3433 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3434 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3435
3436 // FIXME The frontend should detect this case.
3437 if (Depth > 0) {
3438 report_fatal_error("Unsupported stack frame traversal count");
3439 }
3440
3441 return BackChain;
3442}
3443
3444SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3445 SelectionDAG &DAG) const {
3446 MachineFunction &MF = DAG.getMachineFunction();
3447 MachineFrameInfo &MFI = MF.getFrameInfo();
3448 MFI.setReturnAddressIsTaken(true);
3449
3450 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
3451 return SDValue();
3452
3453 SDLoc DL(Op);
3454 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3455 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3456
3457 // FIXME The frontend should detect this case.
3458 if (Depth > 0) {
3459 report_fatal_error("Unsupported stack frame traversal count");
3460 }
3461
3462 // Return R14D, which has the return address. Mark it an implicit live-in.
3463 Register LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
3464 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3465}
3466
3467SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3468 SelectionDAG &DAG) const {
3469 SDLoc DL(Op);
3470 SDValue In = Op.getOperand(0);
3471 EVT InVT = In.getValueType();
3472 EVT ResVT = Op.getValueType();
3473
3474 // Convert loads directly. This is normally done by DAGCombiner,
3475 // but we need this case for bitcasts that are created during lowering
3476 // and which are then lowered themselves.
3477 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3478 if (ISD::isNormalLoad(LoadN)) {
3479 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3480 LoadN->getBasePtr(), LoadN->getMemOperand());
3481 // Update the chain uses.
3482 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3483 return NewLoad;
3484 }
3485
3486 if (InVT == MVT::i32 && ResVT == MVT::f32) {
3487 SDValue In64;
3488 if (Subtarget.hasHighWord()) {
3489 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3490 MVT::i64);
3491 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3492 MVT::i64, SDValue(U64, 0), In);
3493 } else {
3494 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3495 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3496 DAG.getConstant(32, DL, MVT::i64));
3497 }
3498 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3499 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3500 DL, MVT::f32, Out64);
3501 }
3502 if (InVT == MVT::f32 && ResVT == MVT::i32) {
3503 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3504 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3505 MVT::f64, SDValue(U64, 0), In);
3506 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3507 if (Subtarget.hasHighWord())
3508 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3509 MVT::i32, Out64);
3510 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3511 DAG.getConstant(32, DL, MVT::i64));
3512 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3513 }
3514 llvm_unreachable("Unexpected bitcast combination")::llvm::llvm_unreachable_internal("Unexpected bitcast combination"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 3514)
;
3515}
3516
3517SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3518 SelectionDAG &DAG) const {
3519
3520 if (Subtarget.isTargetXPLINK64())
3521 return lowerVASTART_XPLINK(Op, DAG);
3522 else
3523 return lowerVASTART_ELF(Op, DAG);
3524}
3525
3526SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
3527 SelectionDAG &DAG) const {
3528 MachineFunction &MF = DAG.getMachineFunction();
3529 SystemZMachineFunctionInfo *FuncInfo =
3530 MF.getInfo<SystemZMachineFunctionInfo>();
3531
3532 SDLoc DL(Op);
3533
3534 // vastart just stores the address of the VarArgsFrameIndex slot into the
3535 // memory location argument.
3536 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3537 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3538 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3539 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
3540 MachinePointerInfo(SV));
3541}
3542
3543SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
3544 SelectionDAG &DAG) const {
3545 MachineFunction &MF = DAG.getMachineFunction();
3546 SystemZMachineFunctionInfo *FuncInfo =
3547 MF.getInfo<SystemZMachineFunctionInfo>();
3548 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3549
3550 SDValue Chain = Op.getOperand(0);
3551 SDValue Addr = Op.getOperand(1);
3552 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3553 SDLoc DL(Op);
3554
3555 // The initial values of each field.
3556 const unsigned NumFields = 4;
3557 SDValue Fields[NumFields] = {
3558 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3559 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3560 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3561 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3562 };
3563
3564 // Store each field into its respective slot.
3565 SDValue MemOps[NumFields];
3566 unsigned Offset = 0;
3567 for (unsigned I = 0; I < NumFields; ++I) {
3568 SDValue FieldAddr = Addr;
3569 if (Offset != 0)
3570 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3571 DAG.getIntPtrConstant(Offset, DL));
3572 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3573 MachinePointerInfo(SV, Offset));
3574 Offset += 8;
3575 }
3576 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3577}
3578
3579SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3580 SelectionDAG &DAG) const {
3581 SDValue Chain = Op.getOperand(0);
3582 SDValue DstPtr = Op.getOperand(1);
3583 SDValue SrcPtr = Op.getOperand(2);
3584 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3585 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3586 SDLoc DL(Op);
3587
3588 uint32_t Sz =
3589 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
3590 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
3591 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3592 /*isTailCall*/ false, MachinePointerInfo(DstSV),
3593 MachinePointerInfo(SrcSV));
3594}
3595
3596SDValue
3597SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
3598 SelectionDAG &DAG) const {
3599 if (Subtarget.isTargetXPLINK64())
3600 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
3601 else
3602 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
3603}
3604
3605SDValue
3606SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
3607 SelectionDAG &DAG) const {
3608 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3609 MachineFunction &MF = DAG.getMachineFunction();
3610 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3611 SDValue Chain = Op.getOperand(0);
3612 SDValue Size = Op.getOperand(1);
3613 SDValue Align = Op.getOperand(2);
3614 SDLoc DL(Op);
3615
3616 // If user has set the no alignment function attribute, ignore
3617 // alloca alignments.
3618 uint64_t AlignVal =
3619 (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3620
3621 uint64_t StackAlign = TFI->getStackAlignment();
3622 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3623 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3624
3625 SDValue NeededSpace = Size;
3626
3627 // Add extra space for alignment if needed.
3628 EVT PtrVT = getPointerTy(MF.getDataLayout());
3629 if (ExtraAlignSpace)
3630 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
3631 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3632
3633 bool IsSigned = false;
3634 bool DoesNotReturn = false;
3635 bool IsReturnValueUsed = false;
3636 EVT VT = Op.getValueType();
3637 SDValue AllocaCall =
3638 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, makeArrayRef(NeededSpace),
3639 CallingConv::C, IsSigned, DL, DoesNotReturn,
3640 IsReturnValueUsed)
3641 .first;
3642
3643 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
3644 // to end of call in order to ensure it isn't broken up from the call
3645 // sequence.
3646 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
3647 Register SPReg = Regs.getStackPointerRegister();
3648 Chain = AllocaCall.getValue(1);
3649 SDValue Glue = AllocaCall.getValue(2);
3650 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
3651 Chain = NewSPRegNode.getValue(1);
3652
3653 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
3654 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
3655 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
3656
3657 // Dynamically realign if needed.
3658 if (ExtraAlignSpace) {
3659 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3660 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3661 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
3662 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
3663 }
3664
3665 SDValue Ops[2] = {Result, Chain};
3666 return DAG.getMergeValues(Ops, DL);
3667}
3668
3669SDValue
3670SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
3671 SelectionDAG &DAG) const {
3672 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3673 MachineFunction &MF = DAG.getMachineFunction();
3674 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3675 bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3676
3677 SDValue Chain = Op.getOperand(0);
3678 SDValue Size = Op.getOperand(1);
3679 SDValue Align = Op.getOperand(2);
3680 SDLoc DL(Op);
3681
3682 // If user has set the no alignment function attribute, ignore
3683 // alloca alignments.
3684 uint64_t AlignVal =
3685 (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3686
3687 uint64_t StackAlign = TFI->getStackAlignment();
3688 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3689 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3690
3691 Register SPReg = getStackPointerRegisterToSaveRestore();
3692 SDValue NeededSpace = Size;
3693
3694 // Get a reference to the stack pointer.
3695 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
3696
3697 // If we need a backchain, save it now.
3698 SDValue Backchain;
3699 if (StoreBackchain)
3700 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
3701 MachinePointerInfo());
3702
3703 // Add extra space for alignment if needed.
3704 if (ExtraAlignSpace)
3705 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
3706 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3707
3708 // Get the new stack pointer value.
3709 SDValue NewSP;
3710 if (hasInlineStackProbe(MF)) {
3711 NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
3712 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
3713 Chain = NewSP.getValue(1);
3714 }
3715 else {
3716 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
3717 // Copy the new stack pointer back.
3718 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
3719 }
3720
3721 // The allocated data lives above the 160 bytes allocated for the standard
3722 // frame, plus any outgoing stack arguments. We don't know how much that
3723 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3724 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3725 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
3726
3727 // Dynamically realign if needed.
3728 if (RequiredAlign > StackAlign) {
3729 Result =
3730 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
3731 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3732 Result =
3733 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
3734 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
3735 }
3736
3737 if (StoreBackchain)
3738 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
3739 MachinePointerInfo());
3740
3741 SDValue Ops[2] = { Result, Chain };
3742 return DAG.getMergeValues(Ops, DL);
3743}
3744
3745SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3746 SDValue Op, SelectionDAG &DAG) const {
3747 SDLoc DL(Op);
3748
3749 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3750}
3751
3752SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
3753 SelectionDAG &DAG) const {
3754 EVT VT = Op.getValueType();
3755 SDLoc DL(Op);
3756 SDValue Ops[2];
3757 if (is32Bit(VT))
3758 // Just do a normal 64-bit multiplication and extract the results.
3759 // We define this so that it can be used for constant division.
3760 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
3761 Op.getOperand(1), Ops[1], Ops[0]);
3762 else if (Subtarget.hasMiscellaneousExtensions2())
3763 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3764 // the high result in the even register. ISD::SMUL_LOHI is defined to
3765 // return the low half first, so the results are in reverse order.
3766 lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI,
3767 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3768 else {
3769 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3770 //
3771 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3772 //
3773 // but using the fact that the upper halves are either all zeros
3774 // or all ones:
3775 //
3776 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3777 //
3778 // and grouping the right terms together since they are quicker than the
3779 // multiplication:
3780 //
3781 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3782 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
3783 SDValue LL = Op.getOperand(0);
3784 SDValue RL = Op.getOperand(1);
3785 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
3786 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
3787 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3788 // the high result in the even register. ISD::SMUL_LOHI is defined to
3789 // return the low half first, so the results are in reverse order.
3790 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
3791 LL, RL, Ops[1], Ops[0]);
3792 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
3793 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
3794 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
3795 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
3796 }
3797 return DAG.getMergeValues(Ops, DL);
3798}
3799
3800SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
3801 SelectionDAG &DAG) const {
3802 EVT VT = Op.getValueType();
3803 SDLoc DL(Op);
3804 SDValue Ops[2];
3805 if (is32Bit(VT))
3806 // Just do a normal 64-bit multiplication and extract the results.
3807 // We define this so that it can be used for constant division.
3808 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
3809 Op.getOperand(1), Ops[1], Ops[0]);
3810 else
3811 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3812 // the high result in the even register. ISD::UMUL_LOHI is defined to
3813 // return the low half first, so the results are in reverse order.
3814 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
3815 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3816 return DAG.getMergeValues(Ops, DL);
3817}
3818
3819SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
3820 SelectionDAG &DAG) const {
3821 SDValue Op0 = Op.getOperand(0);
3822 SDValue Op1 = Op.getOperand(1);
3823 EVT VT = Op.getValueType();
3824 SDLoc DL(Op);
3825
3826 // We use DSGF for 32-bit division. This means the first operand must
3827 // always be 64-bit, and the second operand should be 32-bit whenever
3828 // that is possible, to improve performance.
3829 if (is32Bit(VT))
3830 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
3831 else if (DAG.ComputeNumSignBits(Op1) > 32)
3832 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
3833
3834 // DSG(F) returns the remainder in the even register and the
3835 // quotient in the odd register.
3836 SDValue Ops[2];
3837 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
3838 return DAG.getMergeValues(Ops, DL);
3839}
3840
3841SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
3842 SelectionDAG &DAG) const {
3843 EVT VT = Op.getValueType();
3844 SDLoc DL(Op);
3845
3846 // DL(G) returns the remainder in the even register and the
3847 // quotient in the odd register.
3848 SDValue Ops[2];
3849 lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM,
3850 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3851 return DAG.getMergeValues(Ops, DL);
3852}
3853
3854SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
3855 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation")(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"Should be 64-bit operation") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Should be 64-bit operation\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 3855, __extension__
__PRETTY_FUNCTION__))
;
3856
3857 // Get the known-zero masks for each operand.
3858 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
3859 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
3860 DAG.computeKnownBits(Ops[1])};
3861
3862 // See if the upper 32 bits of one operand and the lower 32 bits of the
3863 // other are known zero. They are the low and high operands respectively.
3864 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
3865 Known[1].Zero.getZExtValue() };
3866 unsigned High, Low;
3867 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
3868 High = 1, Low = 0;
3869 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
3870 High = 0, Low = 1;
3871 else
3872 return Op;
3873
3874 SDValue LowOp = Ops[Low];
3875 SDValue HighOp = Ops[High];
3876
3877 // If the high part is a constant, we're better off using IILH.
3878 if (HighOp.getOpcode() == ISD::Constant)
3879 return Op;
3880
3881 // If the low part is a constant that is outside the range of LHI,
3882 // then we're better off using IILF.
3883 if (LowOp.getOpcode() == ISD::Constant) {
3884 int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
3885 if (!isInt<16>(Value))
3886 return Op;
3887 }
3888
3889 // Check whether the high part is an AND that doesn't change the
3890 // high 32 bits and just masks out low bits. We can skip it if so.
3891 if (HighOp.getOpcode() == ISD::AND &&
3892 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
3893 SDValue HighOp0 = HighOp.getOperand(0);
3894 uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
3895 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
3896 HighOp = HighOp0;
3897 }
3898
3899 // Take advantage of the fact that all GR32 operations only change the
3900 // low 32 bits by truncating Low to an i32 and inserting it directly
3901 // using a subreg. The interesting cases are those where the truncation
3902 // can be folded.
3903 SDLoc DL(Op);
3904 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
3905 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
3906 MVT::i64, HighOp, Low32);
3907}
3908
3909// Lower SADDO/SSUBO/UADDO/USUBO nodes.
3910SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
3911 SelectionDAG &DAG) const {
3912 SDNode *N = Op.getNode();
3913 SDValue LHS = N->getOperand(0);
3914 SDValue RHS = N->getOperand(1);
3915 SDLoc DL(N);
3916 unsigned BaseOp = 0;
3917 unsigned CCValid = 0;
3918 unsigned CCMask = 0;
3919
3920 switch (Op.getOpcode()) {
3921 default: llvm_unreachable("Unknown instruction!")::llvm::llvm_unreachable_internal("Unknown instruction!", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 3921)
;
3922 case ISD::SADDO:
3923 BaseOp = SystemZISD::SADDO;
3924 CCValid = SystemZ::CCMASK_ARITH;
3925 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
3926 break;
3927 case ISD::SSUBO:
3928 BaseOp = SystemZISD::SSUBO;
3929 CCValid = SystemZ::CCMASK_ARITH;
3930 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
3931 break;
3932 case ISD::UADDO:
3933 BaseOp = SystemZISD::UADDO;
3934 CCValid = SystemZ::CCMASK_LOGICAL;
3935 CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
3936 break;
3937 case ISD::USUBO:
3938 BaseOp = SystemZISD::USUBO;
3939 CCValid = SystemZ::CCMASK_LOGICAL;
3940 CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
3941 break;
3942 }
3943
3944 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
3945 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
3946
3947 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3948 if (N->getValueType(1) == MVT::i1)
3949 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3950
3951 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3952}
3953
3954static bool isAddCarryChain(SDValue Carry) {
3955 while (Carry.getOpcode() == ISD::ADDCARRY)
3956 Carry = Carry.getOperand(2);
3957 return Carry.getOpcode() == ISD::UADDO;
3958}
3959
3960static bool isSubBorrowChain(SDValue Carry) {
3961 while (Carry.getOpcode() == ISD::SUBCARRY)
3962 Carry = Carry.getOperand(2);
3963 return Carry.getOpcode() == ISD::USUBO;
3964}
3965
3966// Lower ADDCARRY/SUBCARRY nodes.
3967SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
3968 SelectionDAG &DAG) const {
3969
3970 SDNode *N = Op.getNode();
3971 MVT VT = N->getSimpleValueType(0);
3972
3973 // Let legalize expand this if it isn't a legal type yet.
3974 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3975 return SDValue();
3976
3977 SDValue LHS = N->getOperand(0);
3978 SDValue RHS = N->getOperand(1);
3979 SDValue Carry = Op.getOperand(2);
3980 SDLoc DL(N);
3981 unsigned BaseOp = 0;
3982 unsigned CCValid = 0;
3983 unsigned CCMask = 0;
3984
3985 switch (Op.getOpcode()) {
3986 default: llvm_unreachable("Unknown instruction!")::llvm::llvm_unreachable_internal("Unknown instruction!", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 3986)
;
3987 case ISD::ADDCARRY:
3988 if (!isAddCarryChain(Carry))
3989 return SDValue();
3990
3991 BaseOp = SystemZISD::ADDCARRY;
3992 CCValid = SystemZ::CCMASK_LOGICAL;
3993 CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
3994 break;
3995 case ISD::SUBCARRY:
3996 if (!isSubBorrowChain(Carry))
3997 return SDValue();
3998
3999 BaseOp = SystemZISD::SUBCARRY;
4000 CCValid = SystemZ::CCMASK_LOGICAL;
4001 CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
4002 break;
4003 }
4004
4005 // Set the condition code from the carry flag.
4006 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4007 DAG.getConstant(CCValid, DL, MVT::i32),
4008 DAG.getConstant(CCMask, DL, MVT::i32));
4009
4010 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4011 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4012
4013 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4014 if (N->getValueType(1) == MVT::i1)
4015 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4016
4017 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4018}
4019
4020SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4021 SelectionDAG &DAG) const {
4022 EVT VT = Op.getValueType();
4023 SDLoc DL(Op);
4024 Op = Op.getOperand(0);
4025
4026 // Handle vector types via VPOPCT.
4027 if (VT.isVector()) {
4028 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4029 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4030 switch (VT.getScalarSizeInBits()) {
4031 case 8:
4032 break;
4033 case 16: {
4034 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4035 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
4036 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
4037 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4038 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
4039 break;
4040 }
4041 case 32: {
4042 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4043 DAG.getConstant(0, DL, MVT::i32));
4044 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4045 break;
4046 }
4047 case 64: {
4048 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4049 DAG.getConstant(0, DL, MVT::i32));
4050 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
4051 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4052 break;
4053 }
4054 default:
4055 llvm_unreachable("Unexpected type")::llvm::llvm_unreachable_internal("Unexpected type", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 4055)
;
4056 }
4057 return Op;
4058 }
4059
4060 // Get the known-zero mask for the operand.
4061 KnownBits Known = DAG.computeKnownBits(Op);
4062 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
4063 if (NumSignificantBits == 0)
4064 return DAG.getConstant(0, DL, VT);
4065
4066 // Skip known-zero high parts of the operand.
4067 int64_t OrigBitSize = VT.getSizeInBits();
4068 int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
4069 BitSize = std::min(BitSize, OrigBitSize);
4070
4071 // The POPCNT instruction counts the number of bits in each byte.
4072 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
4073 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
4074 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
4075
4076 // Add up per-byte counts in a binary tree. All bits of Op at
4077 // position larger than BitSize remain zero throughout.
4078 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
4079 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
4080 if (BitSize != OrigBitSize)
4081 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
4082 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
4083 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4084 }
4085
4086 // Extract overall result from high byte.
4087 if (BitSize > 8)
4088 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
4089 DAG.getConstant(BitSize - 8, DL, VT));
4090
4091 return Op;
4092}
4093
4094SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
4095 SelectionDAG &DAG) const {
4096 SDLoc DL(Op);
4097 AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
4098 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
4099 SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
4100 cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
4101
4102 // The only fence that needs an instruction is a sequentially-consistent
4103 // cross-thread fence.
4104 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
4105 FenceSSID == SyncScope::System) {
4106 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
4107 Op.getOperand(0)),
4108 0);
4109 }
4110
4111 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4112 return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
4113}
4114
4115// Op is an atomic load. Lower it into a normal volatile load.
4116SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
4117 SelectionDAG &DAG) const {
4118 auto *Node = cast<AtomicSDNode>(Op.getNode());
4119 return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
4120 Node->getChain(), Node->getBasePtr(),
4121 Node->getMemoryVT(), Node->getMemOperand());
4122}
4123
4124// Op is an atomic store. Lower it into a normal volatile store.
4125SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
4126 SelectionDAG &DAG) const {
4127 auto *Node = cast<AtomicSDNode>(Op.getNode());
4128 SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
4129 Node->getBasePtr(), Node->getMemoryVT(),
4130 Node->getMemOperand());
4131 // We have to enforce sequential consistency by performing a
4132 // serialization operation after the store.
4133 if (Node->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent)
4134 Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
4135 MVT::Other, Chain), 0);
4136 return Chain;
4137}
4138
4139// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
4140// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
4141SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
4142 SelectionDAG &DAG,
4143 unsigned Opcode) const {
4144 auto *Node = cast<AtomicSDNode>(Op.getNode());
4145
4146 // 32-bit operations need no code outside the main loop.
4147 EVT NarrowVT = Node->getMemoryVT();
4148 EVT WideVT = MVT::i32;
4149 if (NarrowVT == WideVT)
4150 return Op;
4151
4152 int64_t BitSize = NarrowVT.getSizeInBits();
4153 SDValue ChainIn = Node->getChain();
4154 SDValue Addr = Node->getBasePtr();
4155 SDValue Src2 = Node->getVal();
4156 MachineMemOperand *MMO = Node->getMemOperand();
4157 SDLoc DL(Node);
4158 EVT PtrVT = Addr.getValueType();
4159
4160 // Convert atomic subtracts of constants into additions.
4161 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
4162 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
4163 Opcode = SystemZISD::ATOMIC_LOADW_ADD;
4164 Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
4165 }
4166
4167 // Get the address of the containing word.
4168 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4169 DAG.getConstant(-4, DL, PtrVT));
4170
4171 // Get the number of bits that the word must be rotated left in order
4172 // to bring the field to the top bits of a GR32.
4173 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4174 DAG.getConstant(3, DL, PtrVT));
4175 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4176
4177 // Get the complementing shift amount, for rotating a field in the top
4178 // bits back to its proper position.
4179 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4180 DAG.getConstant(0, DL, WideVT), BitShift);
4181
4182 // Extend the source operand to 32 bits and prepare it for the inner loop.
4183 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
4184 // operations require the source to be shifted in advance. (This shift
4185 // can be folded if the source is constant.) For AND and NAND, the lower
4186 // bits must be set, while for other opcodes they should be left clear.
4187 if (Opcode != SystemZISD::ATOMIC_SWAPW)
4188 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
4189 DAG.getConstant(32 - BitSize, DL, WideVT));
4190 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
4191 Opcode == SystemZISD::ATOMIC_LOADW_NAND)
4192 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
4193 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
4194
4195 // Construct the ATOMIC_LOADW_* node.
4196 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
4197 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
4198 DAG.getConstant(BitSize, DL, WideVT) };
4199 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
4200 NarrowVT, MMO);
4201
4202 // Rotate the result of the final CS so that the field is in the lower
4203 // bits of a GR32, then truncate it.
4204 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
4205 DAG.getConstant(BitSize, DL, WideVT));
4206 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
4207
4208 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
4209 return DAG.getMergeValues(RetOps, DL);
4210}
4211
4212// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
4213// into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
4214// operations into additions.
4215SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
4216 SelectionDAG &DAG) const {
4217 auto *Node = cast<AtomicSDNode>(Op.getNode());
4218 EVT MemVT = Node->getMemoryVT();
4219 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
4220 // A full-width operation.
4221 assert(Op.getValueType() == MemVT && "Mismatched VTs")(static_cast <bool> (Op.getValueType() == MemVT &&
"Mismatched VTs") ? void (0) : __assert_fail ("Op.getValueType() == MemVT && \"Mismatched VTs\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4221, __extension__
__PRETTY_FUNCTION__))
;
4222 SDValue Src2 = Node->getVal();
4223 SDValue NegSrc2;
4224 SDLoc DL(Src2);
4225
4226 if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
4227 // Use an addition if the operand is constant and either LAA(G) is
4228 // available or the negative value is in the range of A(G)FHI.
4229 int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
4230 if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
4231 NegSrc2 = DAG.getConstant(Value, DL, MemVT);
4232 } else if (Subtarget.hasInterlockedAccess1())
4233 // Use LAA(G) if available.
4234 NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
4235 Src2);
4236
4237 if (NegSrc2.getNode())
4238 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
4239 Node->getChain(), Node->getBasePtr(), NegSrc2,
4240 Node->getMemOperand());
4241
4242 // Use the node as-is.
4243 return Op;
4244 }
4245
4246 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
4247}
4248
4249// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
4250SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
4251 SelectionDAG &DAG) const {
4252 auto *Node = cast<AtomicSDNode>(Op.getNode());
4253 SDValue ChainIn = Node->getOperand(0);
4254 SDValue Addr = Node->getOperand(1);
4255 SDValue CmpVal = Node->getOperand(2);
4256 SDValue SwapVal = Node->getOperand(3);
4257 MachineMemOperand *MMO = Node->getMemOperand();
4258 SDLoc DL(Node);
4259
4260 // We have native support for 32-bit and 64-bit compare and swap, but we
4261 // still need to expand extracting the "success" result from the CC.
4262 EVT NarrowVT = Node->getMemoryVT();
4263 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
4264 if (NarrowVT == WideVT) {
4265 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4266 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
4267 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP,
4268 DL, Tys, Ops, NarrowVT, MMO);
4269 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4270 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);
4271
4272 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
4273 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4274 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4275 return SDValue();
4276 }
4277
4278 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
4279 // via a fullword ATOMIC_CMP_SWAPW operation.
4280 int64_t BitSize = NarrowVT.getSizeInBits();
4281 EVT PtrVT = Addr.getValueType();
4282
4283 // Get the address of the containing word.
4284 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4285 DAG.getConstant(-4, DL, PtrVT));
4286
4287 // Get the number of bits that the word must be rotated left in order
4288 // to bring the field to the top bits of a GR32.
4289 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4290 DAG.getConstant(3, DL, PtrVT));
4291 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4292
4293 // Get the complementing shift amount, for rotating a field in the top
4294 // bits back to its proper position.
4295 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4296 DAG.getConstant(0, DL, WideVT), BitShift);
4297
4298 // Construct the ATOMIC_CMP_SWAPW node.
4299 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4300 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
4301 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
4302 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
4303 VTList, Ops, NarrowVT, MMO);
4304 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4305 SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ);
4306
4307 // emitAtomicCmpSwapW() will zero extend the result (original value).
4308 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
4309 DAG.getValueType(NarrowVT));
4310 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
4311 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4312 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4313 return SDValue();
4314}
4315
4316MachineMemOperand::Flags
4317SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
4318 // Because of how we convert atomic_load and atomic_store to normal loads and
4319 // stores in the DAG, we need to ensure that the MMOs are marked volatile
4320 // since DAGCombine hasn't been updated to account for atomic, but non
4321 // volatile loads. (See D57601)
4322 if (auto *SI = dyn_cast<StoreInst>(&I))
4323 if (SI->isAtomic())
4324 return MachineMemOperand::MOVolatile;
4325 if (auto *LI = dyn_cast<LoadInst>(&I))
4326 if (LI->isAtomic())
4327 return MachineMemOperand::MOVolatile;
4328 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
4329 if (AI->isAtomic())
4330 return MachineMemOperand::MOVolatile;
4331 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
4332 if (AI->isAtomic())
4333 return MachineMemOperand::MOVolatile;
4334 return MachineMemOperand::MONone;
4335}
4336
4337SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
4338 SelectionDAG &DAG) const {
4339 MachineFunction &MF = DAG.getMachineFunction();
4340 const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
4341 auto *Regs = Subtarget->getSpecialRegisters();
4342 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
4343 report_fatal_error("Variable-sized stack allocations are not supported "
4344 "in GHC calling convention");
4345 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
4346 Regs->getStackPointerRegister(), Op.getValueType());
4347}
4348
4349SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
4350 SelectionDAG &DAG) const {
4351 MachineFunction &MF = DAG.getMachineFunction();
4352 const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
4353 auto *Regs = Subtarget->getSpecialRegisters();
4354 bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
4355
4356 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
4357 report_fatal_error("Variable-sized stack allocations are not supported "
4358 "in GHC calling convention");
4359
4360 SDValue Chain = Op.getOperand(0);
4361 SDValue NewSP = Op.getOperand(1);
4362 SDValue Backchain;
4363 SDLoc DL(Op);
4364
4365 if (StoreBackchain) {
4366 SDValue OldSP = DAG.getCopyFromReg(
4367 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
4368 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4369 MachinePointerInfo());
4370 }
4371
4372 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
4373
4374 if (StoreBackchain)
4375 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4376 MachinePointerInfo());
4377
4378 return Chain;
4379}
4380
4381SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
4382 SelectionDAG &DAG) const {
4383 bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
4384 if (!IsData)
4385 // Just preserve the chain.
4386 return Op.getOperand(0);
4387
4388 SDLoc DL(Op);
4389 bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
4390 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
4391 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
4392 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
4393 Op.getOperand(1)};
4394 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
4395 Node->getVTList(), Ops,
4396 Node->getMemoryVT(), Node->getMemOperand());
4397}
4398
4399// Convert condition code in CCReg to an i32 value.
4400static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) {
4401 SDLoc DL(CCReg);
4402 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
4403 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
4404 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
4405}
4406
4407SDValue
4408SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4409 SelectionDAG &DAG) const {
4410 unsigned Opcode, CCValid;
4411 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
4412 assert(Op->getNumValues() == 2 && "Expected only CC result and chain")(static_cast <bool> (Op->getNumValues() == 2 &&
"Expected only CC result and chain") ? void (0) : __assert_fail
("Op->getNumValues() == 2 && \"Expected only CC result and chain\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4412, __extension__
__PRETTY_FUNCTION__))
;
4413 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
4414 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
4415 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
4416 return SDValue();
4417 }
4418
4419 return SDValue();
4420}
4421
4422SDValue
4423SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4424 SelectionDAG &DAG) const {
4425 unsigned Opcode, CCValid;
4426 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
4427 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
4428 if (Op->getNumValues() == 1)
4429 return getCCResult(DAG, SDValue(Node, 0));
4430 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result")(static_cast <bool> (Op->getNumValues() == 2 &&
"Expected a CC and non-CC result") ? void (0) : __assert_fail
("Op->getNumValues() == 2 && \"Expected a CC and non-CC result\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4430, __extension__
__PRETTY_FUNCTION__))
;
4431 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
4432 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
4433 }
4434
4435 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4436 switch (Id) {
4437 case Intrinsic::thread_pointer:
4438 return lowerThreadPointer(SDLoc(Op), DAG);
4439
4440 case Intrinsic::s390_vpdi:
4441 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
4442 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4443
4444 case Intrinsic::s390_vperm:
4445 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
4446 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4447
4448 case Intrinsic::s390_vuphb:
4449 case Intrinsic::s390_vuphh:
4450 case Intrinsic::s390_vuphf:
4451 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
4452 Op.getOperand(1));
4453
4454 case Intrinsic::s390_vuplhb:
4455 case Intrinsic::s390_vuplhh:
4456 case Intrinsic::s390_vuplhf:
4457 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
4458 Op.getOperand(1));
4459
4460 case Intrinsic::s390_vuplb:
4461 case Intrinsic::s390_vuplhw:
4462 case Intrinsic::s390_vuplf:
4463 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
4464 Op.getOperand(1));
4465
4466 case Intrinsic::s390_vupllb:
4467 case Intrinsic::s390_vupllh:
4468 case Intrinsic::s390_vupllf:
4469 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
4470 Op.getOperand(1));
4471
4472 case Intrinsic::s390_vsumb:
4473 case Intrinsic::s390_vsumh:
4474 case Intrinsic::s390_vsumgh:
4475 case Intrinsic::s390_vsumgf:
4476 case Intrinsic::s390_vsumqf:
4477 case Intrinsic::s390_vsumqg:
4478 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
4479 Op.getOperand(1), Op.getOperand(2));
4480 }
4481
4482 return SDValue();
4483}
4484
4485namespace {
4486// Says that SystemZISD operation Opcode can be used to perform the equivalent
4487// of a VPERM with permute vector Bytes. If Opcode takes three operands,
4488// Operand is the constant third operand, otherwise it is the number of
4489// bytes in each element of the result.
4490struct Permute {
4491 unsigned Opcode;
4492 unsigned Operand;
4493 unsigned char Bytes[SystemZ::VectorBytes];
4494};
4495}
4496
4497static const Permute PermuteForms[] = {
4498 // VMRHG
4499 { SystemZISD::MERGE_HIGH, 8,
4500 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
4501 // VMRHF
4502 { SystemZISD::MERGE_HIGH, 4,
4503 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
4504 // VMRHH
4505 { SystemZISD::MERGE_HIGH, 2,
4506 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
4507 // VMRHB
4508 { SystemZISD::MERGE_HIGH, 1,
4509 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
4510 // VMRLG
4511 { SystemZISD::MERGE_LOW, 8,
4512 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
4513 // VMRLF
4514 { SystemZISD::MERGE_LOW, 4,
4515 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
4516 // VMRLH
4517 { SystemZISD::MERGE_LOW, 2,
4518 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
4519 // VMRLB
4520 { SystemZISD::MERGE_LOW, 1,
4521 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
4522 // VPKG
4523 { SystemZISD::PACK, 4,
4524 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
4525 // VPKF
4526 { SystemZISD::PACK, 2,
4527 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
4528 // VPKH
4529 { SystemZISD::PACK, 1,
4530 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
4531 // VPDI V1, V2, 4 (low half of V1, high half of V2)
4532 { SystemZISD::PERMUTE_DWORDS, 4,
4533 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
4534 // VPDI V1, V2, 1 (high half of V1, low half of V2)
4535 { SystemZISD::PERMUTE_DWORDS, 1,
4536 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
4537};
4538
4539// Called after matching a vector shuffle against a particular pattern.
4540// Both the original shuffle and the pattern have two vector operands.
4541// OpNos[0] is the operand of the original shuffle that should be used for
4542// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
4543// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
4544// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
4545// for operands 0 and 1 of the pattern.
4546static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
4547 if (OpNos[0] < 0) {
4548 if (OpNos[1] < 0)
4549 return false;
4550 OpNo0 = OpNo1 = OpNos[1];
4551 } else if (OpNos[1] < 0) {
4552 OpNo0 = OpNo1 = OpNos[0];
4553 } else {
4554 OpNo0 = OpNos[0];
4555 OpNo1 = OpNos[1];
4556 }
4557 return true;
4558}
4559
4560// Bytes is a VPERM-like permute vector, except that -1 is used for
4561// undefined bytes. Return true if the VPERM can be implemented using P.
4562// When returning true set OpNo0 to the VPERM operand that should be
4563// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
4564//
4565// For example, if swapping the VPERM operands allows P to match, OpNo0
4566// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
4567// operand, but rewriting it to use two duplicated operands allows it to
4568// match P, then OpNo0 and OpNo1 will be the same.
4569static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
4570 unsigned &OpNo0, unsigned &OpNo1) {
4571 int OpNos[] = { -1, -1 };
4572 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
4573 int Elt = Bytes[I];
4574 if (Elt >= 0) {
4575 // Make sure that the two permute vectors use the same suboperand
4576 // byte number. Only the operand numbers (the high bits) are
4577 // allowed to differ.
4578 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
4579 return false;
4580 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
4581 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
4582 // Make sure that the operand mappings are consistent with previous
4583 // elements.
4584 if (OpNos[ModelOpNo] == 1 - RealOpNo)
4585 return false;
4586 OpNos[ModelOpNo] = RealOpNo;
4587 }
4588 }
4589 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
4590}
4591
4592// As above, but search for a matching permute.
4593static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
4594 unsigned &OpNo0, unsigned &OpNo1) {
4595 for (auto &P : PermuteForms)
4596 if (matchPermute(Bytes, P, OpNo0, OpNo1))
4597 return &P;
4598 return nullptr;
4599}
4600
4601// Bytes is a VPERM-like permute vector, except that -1 is used for
4602// undefined bytes. This permute is an operand of an outer permute.
4603// See whether redistributing the -1 bytes gives a shuffle that can be
4604// implemented using P. If so, set Transform to a VPERM-like permute vector
4605// that, when applied to the result of P, gives the original permute in Bytes.
4606static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
4607 const Permute &P,
4608 SmallVectorImpl<int> &Transform) {
4609 unsigned To = 0;
4610 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
4611 int Elt = Bytes[From];
4612 if (Elt < 0)
4613 // Byte number From of the result is undefined.
4614 Transform[From] = -1;
4615 else {
4616 while (P.Bytes[To] != Elt) {
4617 To += 1;
4618 if (To == SystemZ::VectorBytes)
4619 return false;
4620 }
4621 Transform[From] = To;
4622 }
4623 }
4624 return true;
4625}
4626
4627// As above, but search for a matching permute.
4628static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
4629 SmallVectorImpl<int> &Transform) {
4630 for (auto &P : PermuteForms)
4631 if (matchDoublePermute(Bytes, P, Transform))
4632 return &P;
4633 return nullptr;
4634}
4635
4636// Convert the mask of the given shuffle op into a byte-level mask,
4637// as if it had type vNi8.
4638static bool getVPermMask(SDValue ShuffleOp,
4639 SmallVectorImpl<int> &Bytes) {
4640 EVT VT = ShuffleOp.getValueType();
4641 unsigned NumElements = VT.getVectorNumElements();
4642 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4643
4644 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
4645 Bytes.resize(NumElements * BytesPerElement, -1);
4646 for (unsigned I = 0; I < NumElements; ++I) {
4647 int Index = VSN->getMaskElt(I);
4648 if (Index >= 0)
4649 for (unsigned J = 0; J < BytesPerElement; ++J)
4650 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
4651 }
4652 return true;
4653 }
4654 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
4655 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
4656 unsigned Index = ShuffleOp.getConstantOperandVal(1);
4657 Bytes.resize(NumElements * BytesPerElement, -1);
4658 for (unsigned I = 0; I < NumElements; ++I)
4659 for (unsigned J = 0; J < BytesPerElement; ++J)
4660 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
4661 return true;
4662 }
4663 return false;
4664}
4665
4666// Bytes is a VPERM-like permute vector, except that -1 is used for
4667// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
4668// the result come from a contiguous sequence of bytes from one input.
4669// Set Base to the selector for the first byte if so.
4670static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
4671 unsigned BytesPerElement, int &Base) {
4672 Base = -1;
4673 for (unsigned I = 0; I < BytesPerElement; ++I) {
4674 if (Bytes[Start + I] >= 0) {
4675 unsigned Elem = Bytes[Start + I];
4676 if (Base < 0) {
4677 Base = Elem - I;
4678 // Make sure the bytes would come from one input operand.
4679 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
4680 return false;
4681 } else if (unsigned(Base) != Elem - I)
4682 return false;
4683 }
4684 }
4685 return true;
4686}
4687
4688// Bytes is a VPERM-like permute vector, except that -1 is used for
4689// undefined bytes. Return true if it can be performed using VSLDB.
4690// When returning true, set StartIndex to the shift amount and OpNo0
4691// and OpNo1 to the VPERM operands that should be used as the first
4692// and second shift operand respectively.
4693static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
4694 unsigned &StartIndex, unsigned &OpNo0,
4695 unsigned &OpNo1) {
4696 int OpNos[] = { -1, -1 };
4697 int Shift = -1;
4698 for (unsigned I = 0; I < 16; ++I) {
4699 int Index = Bytes[I];
4700 if (Index >= 0) {
4701 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
4702 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
4703 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
4704 if (Shift < 0)
4705 Shift = ExpectedShift;
4706 else if (Shift != ExpectedShift)
4707 return false;
4708 // Make sure that the operand mappings are consistent with previous
4709 // elements.
4710 if (OpNos[ModelOpNo] == 1 - RealOpNo)
4711 return false;
4712 OpNos[ModelOpNo] = RealOpNo;
4713 }
4714 }
4715 StartIndex = Shift;
4716 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
4717}
4718
4719// Create a node that performs P on operands Op0 and Op1, casting the
4720// operands to the appropriate type. The type of the result is determined by P.
4721static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
4722 const Permute &P, SDValue Op0, SDValue Op1) {
4723 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
4724 // elements of a PACK are twice as wide as the outputs.
4725 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
4726 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
4727 P.Operand);
4728 // Cast both operands to the appropriate type.
4729 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
4730 SystemZ::VectorBytes / InBytes);
4731 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
4732 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
4733 SDValue Op;
4734 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
4735 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
4736 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
4737 } else if (P.Opcode == SystemZISD::PACK) {
4738 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
4739 SystemZ::VectorBytes / P.Operand);
4740 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
4741 } else {
4742 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
4743 }
4744 return Op;
4745}
4746
4747static bool isZeroVector(SDValue N) {
4748 if (N->getOpcode() == ISD::BITCAST)
4749 N = N->getOperand(0);
4750 if (N->getOpcode() == ISD::SPLAT_VECTOR)
4751 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
4752 return Op->getZExtValue() == 0;
4753 return ISD::isBuildVectorAllZeros(N.getNode());
4754}
4755
4756// Return the index of the zero/undef vector, or UINT32_MAX if not found.
4757static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
4758 for (unsigned I = 0; I < Num ; I++)
4759 if (isZeroVector(Ops[I]))
4760 return I;
4761 return UINT32_MAX(4294967295U);
4762}
4763
4764// Bytes is a VPERM-like permute vector, except that -1 is used for
4765// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
4766// VSLDB or VPERM.
4767static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
4768 SDValue *Ops,
4769 const SmallVectorImpl<int> &Bytes) {
4770 for (unsigned I = 0; I < 2; ++I)
4771 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
4772
4773 // First see whether VSLDB can be used.
4774 unsigned StartIndex, OpNo0, OpNo1;
4775 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
4776 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
4777 Ops[OpNo1],
4778 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
4779
4780 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
4781 // eliminate a zero vector by reusing any zero index in the permute vector.
4782 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
4783 if (ZeroVecIdx != UINT32_MAX(4294967295U)) {
4784 bool MaskFirst = true;
4785 int ZeroIdx = -1;
4786 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
4787 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
4788 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
4789 if (OpNo == ZeroVecIdx && I == 0) {
4790 // If the first byte is zero, use mask as first operand.
4791 ZeroIdx = 0;
4792 break;
4793 }
4794 if (OpNo != ZeroVecIdx && Byte == 0) {
4795 // If mask contains a zero, use it by placing that vector first.
4796 ZeroIdx = I + SystemZ::VectorBytes;
4797 MaskFirst = false;
4798 break;
4799 }
4800 }
4801 if (ZeroIdx != -1) {
4802 SDValue IndexNodes[SystemZ::VectorBytes];
4803 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
4804 if (Bytes[I] >= 0) {
4805 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
4806 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
4807 if (OpNo == ZeroVecIdx)
4808 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
4809 else {
4810 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
4811 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
4812 }
4813 } else
4814 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
4815 }
4816 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
4817 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
4818 if (MaskFirst)
4819 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
4820 Mask);
4821 else
4822 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
4823 Mask);
4824 }
4825 }
4826
4827 SDValue IndexNodes[SystemZ::VectorBytes];
4828 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4829 if (Bytes[I] >= 0)
4830 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
4831 else
4832 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
4833 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
4834 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
4835 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
4836}
4837
4838namespace {
4839// Describes a general N-operand vector shuffle.
4840struct GeneralShuffle {
4841 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX(2147483647 *2U +1U)) {}
4842 void addUndef();
4843 bool add(SDValue, unsigned);
4844 SDValue getNode(SelectionDAG &, const SDLoc &);
4845 void tryPrepareForUnpack();
4846 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
4847 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
4848
4849 // The operands of the shuffle.
4850 SmallVector<SDValue, SystemZ::VectorBytes> Ops;
4851
4852 // Index I is -1 if byte I of the result is undefined. Otherwise the
4853 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
4854 // Bytes[I] / SystemZ::VectorBytes.
4855 SmallVector<int, SystemZ::VectorBytes> Bytes;
4856
4857 // The type of the shuffle result.
4858 EVT VT;
4859
4860 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
4861 unsigned UnpackFromEltSize;
4862};
4863}
4864
4865// Add an extra undefined element to the shuffle.
4866void GeneralShuffle::addUndef() {
4867 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4868 for (unsigned I = 0; I < BytesPerElement; ++I)
4869 Bytes.push_back(-1);
4870}
4871
4872// Add an extra element to the shuffle, taking it from element Elem of Op.
4873// A null Op indicates a vector input whose value will be calculated later;
4874// there is at most one such input per shuffle and it always has the same
4875// type as the result. Aborts and returns false if the source vector elements
4876// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
4877// LLVM they become implicitly extended, but this is rare and not optimized.
4878bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
4879 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4880
4881 // The source vector can have wider elements than the result,
4882 // either through an explicit TRUNCATE or because of type legalization.
4883 // We want the least significant part.
4884 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
4885 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
4886
4887 // Return false if the source elements are smaller than their destination
4888 // elements.
4889 if (FromBytesPerElement < BytesPerElement)
4890 return false;
4891
4892 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
4893 (FromBytesPerElement - BytesPerElement));
4894
4895 // Look through things like shuffles and bitcasts.
4896 while (Op.getNode()) {
4897 if (Op.getOpcode() == ISD::BITCAST)
4898 Op = Op.getOperand(0);
4899 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
4900 // See whether the bytes we need come from a contiguous part of one
4901 // operand.
4902 SmallVector<int, SystemZ::VectorBytes> OpBytes;
4903 if (!getVPermMask(Op, OpBytes))
4904 break;
4905 int NewByte;
4906 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
4907 break;
4908 if (NewByte < 0) {
4909 addUndef();
4910 return true;
4911 }
4912 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
4913 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
4914 } else if (Op.isUndef()) {
4915 addUndef();
4916 return true;
4917 } else
4918 break;
4919 }
4920
4921 // Make sure that the source of the extraction is in Ops.
4922 unsigned OpNo = 0;
4923 for (; OpNo < Ops.size(); ++OpNo)
4924 if (Ops[OpNo] == Op)
4925 break;
4926 if (OpNo == Ops.size())
4927 Ops.push_back(Op);
4928
4929 // Add the element to Bytes.
4930 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
4931 for (unsigned I = 0; I < BytesPerElement; ++I)
4932 Bytes.push_back(Base + I);
4933
4934 return true;
4935}
4936
4937// Return SDNodes for the completed shuffle.
4938SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
4939 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector")(static_cast <bool> (Bytes.size() == SystemZ::VectorBytes
&& "Incomplete vector") ? void (0) : __assert_fail (
"Bytes.size() == SystemZ::VectorBytes && \"Incomplete vector\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4939, __extension__
__PRETTY_FUNCTION__))
;
4940
4941 if (Ops.size() == 0)
4942 return DAG.getUNDEF(VT);
4943
4944 // Use a single unpack if possible as the last operation.
4945 tryPrepareForUnpack();
4946
4947 // Make sure that there are at least two shuffle operands.
4948 if (Ops.size() == 1)
4949 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
4950
4951 // Create a tree of shuffles, deferring root node until after the loop.
4952 // Try to redistribute the undefined elements of non-root nodes so that
4953 // the non-root shuffles match something like a pack or merge, then adjust
4954 // the parent node's permute vector to compensate for the new order.
4955 // Among other things, this copes with vectors like <2 x i16> that were
4956 // padded with undefined elements during type legalization.
4957 //
4958 // In the best case this redistribution will lead to the whole tree
4959 // using packs and merges. It should rarely be a loss in other cases.
4960 unsigned Stride = 1;
4961 for (; Stride * 2 < Ops.size(); Stride *= 2) {
4962 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
4963 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
4964
4965 // Create a mask for just these two operands.
4966 SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes);
4967 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
4968 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
4969 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
4970 if (OpNo == I)
4971 NewBytes[J] = Byte;
4972 else if (OpNo == I + Stride)
4973 NewBytes[J] = SystemZ::VectorBytes + Byte;
4974 else
4975 NewBytes[J] = -1;
4976 }
4977 // See if it would be better to reorganize NewMask to avoid using VPERM.
4978 SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
4979 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
4980 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
4981 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
4982 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
4983 if (NewBytes[J] >= 0) {
4984 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&(static_cast <bool> (unsigned(NewBytesMap[J]) < SystemZ
::VectorBytes && "Invalid double permute") ? void (0)
: __assert_fail ("unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && \"Invalid double permute\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4985, __extension__
__PRETTY_FUNCTION__))
4985 "Invalid double permute")(static_cast <bool> (unsigned(NewBytesMap[J]) < SystemZ
::VectorBytes && "Invalid double permute") ? void (0)
: __assert_fail ("unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && \"Invalid double permute\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4985, __extension__
__PRETTY_FUNCTION__))
;
4986 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
4987 } else
4988 assert(NewBytesMap[J] < 0 && "Invalid double permute")(static_cast <bool> (NewBytesMap[J] < 0 && "Invalid double permute"
) ? void (0) : __assert_fail ("NewBytesMap[J] < 0 && \"Invalid double permute\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 4988, __extension__
__PRETTY_FUNCTION__))
;
4989 }
4990 } else {
4991 // Just use NewBytes on the operands.
4992 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
4993 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
4994 if (NewBytes[J] >= 0)
4995 Bytes[J] = I * SystemZ::VectorBytes + J;
4996 }
4997 }
4998 }
4999
5000 // Now we just have 2 inputs. Put the second operand in Ops[1].
5001 if (Stride > 1) {
5002 Ops[1] = Ops[Stride];
5003 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5004 if (Bytes[I] >= int(SystemZ::VectorBytes))
5005 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
5006 }
5007
5008 // Look for an instruction that can do the permute without resorting
5009 // to VPERM.
5010 unsigned OpNo0, OpNo1;
5011 SDValue Op;
5012 if (unpackWasPrepared() && Ops[1].isUndef())
5013 Op = Ops[0];
5014 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
5015 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
5016 else
5017 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
5018
5019 Op = insertUnpackIfPrepared(DAG, DL, Op);
5020
5021 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5022}
5023
5024#ifndef NDEBUG
5025static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
5026 dbgs() << Msg.c_str() << " { ";
5027 for (unsigned i = 0; i < Bytes.size(); i++)
5028 dbgs() << Bytes[i] << " ";
5029 dbgs() << "}\n";
5030}
5031#endif
5032
5033// If the Bytes vector matches an unpack operation, prepare to do the unpack
5034// after all else by removing the zero vector and the effect of the unpack on
5035// Bytes.
5036void GeneralShuffle::tryPrepareForUnpack() {
5037 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
5038 if (ZeroVecOpNo == UINT32_MAX(4294967295U) || Ops.size() == 1)
5039 return;
5040
5041 // Only do this if removing the zero vector reduces the depth, otherwise
5042 // the critical path will increase with the final unpack.
5043 if (Ops.size() > 2 &&
5044 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
5045 return;
5046
5047 // Find an unpack that would allow removing the zero vector from Ops.
5048 UnpackFromEltSize = 1;
5049 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
5050 bool MatchUnpack = true;
5051 SmallVector<int, SystemZ::VectorBytes> SrcBytes;
5052 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
5053 unsigned ToEltSize = UnpackFromEltSize * 2;
5054 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
5055 if (!IsZextByte)
5056 SrcBytes.push_back(Bytes[Elt]);
5057 if (Bytes[Elt] != -1) {
5058 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
5059 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
5060 MatchUnpack = false;
5061 break;
5062 }
5063 }
5064 }
5065 if (MatchUnpack) {
5066 if (Ops.size() == 2) {
5067 // Don't use unpack if a single source operand needs rearrangement.
5068 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
5069 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
5070 UnpackFromEltSize = UINT_MAX(2147483647 *2U +1U);
5071 return;
5072 }
5073 }
5074 break;
5075 }
5076 }
5077 if (UnpackFromEltSize > 4)
5078 return;
5079
5080 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dbgs() << "Preparing for final unpack of element size "
<< UnpackFromEltSize << ". Zero vector is Op#" <<
ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:"
);; } } while (false)
5081 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNodo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dbgs() << "Preparing for final unpack of element size "
<< UnpackFromEltSize << ". Zero vector is Op#" <<
ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:"
);; } } while (false)
5082 << ".\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dbgs() << "Preparing for final unpack of element size "
<< UnpackFromEltSize << ". Zero vector is Op#" <<
ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:"
);; } } while (false)
5083 dumpBytes(Bytes, "Original Bytes vector:");)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dbgs() << "Preparing for final unpack of element size "
<< UnpackFromEltSize << ". Zero vector is Op#" <<
ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:"
);; } } while (false)
;
5084
5085 // Apply the unpack in reverse to the Bytes array.
5086 unsigned B = 0;
5087 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
5088 Elt += UnpackFromEltSize;
5089 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
5090 Bytes[B] = Bytes[Elt];
5091 }
5092 while (B < SystemZ::VectorBytes)
5093 Bytes[B++] = -1;
5094
5095 // Remove the zero vector from Ops
5096 Ops.erase(&Ops[ZeroVecOpNo]);
5097 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5098 if (Bytes[I] >= 0) {
5099 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5100 if (OpNo > ZeroVecOpNo)
5101 Bytes[I] -= SystemZ::VectorBytes;
5102 }
5103
5104 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:"
); dbgs() << "\n";; } } while (false)
5105 dbgs() << "\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("systemz-lower")) { dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:"
); dbgs() << "\n";; } } while (false)
;
5106}
5107
5108SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
5109 const SDLoc &DL,
5110 SDValue Op) {
5111 if (!unpackWasPrepared())
5112 return Op;
5113 unsigned InBits = UnpackFromEltSize * 8;
5114 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
5115 SystemZ::VectorBits / InBits);
5116 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
5117 unsigned OutBits = InBits * 2;
5118 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
5119 SystemZ::VectorBits / OutBits);
5120 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
5121}
5122
5123// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
5124static bool isScalarToVector(SDValue Op) {
5125 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
5126 if (!Op.getOperand(I).isUndef())
5127 return false;
5128 return true;
5129}
5130
5131// Return a vector of type VT that contains Value in the first element.
5132// The other elements don't matter.
5133static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5134 SDValue Value) {
5135 // If we have a constant, replicate it to all elements and let the
5136 // BUILD_VECTOR lowering take care of it.
5137 if (Value.getOpcode() == ISD::Constant ||
5138 Value.getOpcode() == ISD::ConstantFP) {
5139 SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);
5140 return DAG.getBuildVector(VT, DL, Ops);
5141 }
5142 if (Value.isUndef())
5143 return DAG.getUNDEF(VT);
5144 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
5145}
5146
5147// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
5148// element 1. Used for cases in which replication is cheap.
5149static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5150 SDValue Op0, SDValue Op1) {
5151 if (Op0.isUndef()) {
5152 if (Op1.isUndef())
5153 return DAG.getUNDEF(VT);
5154 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
5155 }
5156 if (Op1.isUndef())
5157 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
5158 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
5159 buildScalarToVector(DAG, DL, VT, Op0),
5160 buildScalarToVector(DAG, DL, VT, Op1));
5161}
5162
5163// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
5164// vector for them.
5165static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
5166 SDValue Op1) {
5167 if (Op0.isUndef() && Op1.isUndef())
5168 return DAG.getUNDEF(MVT::v2i64);
5169 // If one of the two inputs is undefined then replicate the other one,
5170 // in order to avoid using another register unnecessarily.
5171 if (Op0.isUndef())
5172 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5173 else if (Op1.isUndef())
5174 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5175 else {
5176 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5177 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5178 }
5179 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
5180}
5181
5182// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
5183// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
5184// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
5185// would benefit from this representation and return it if so.
5186static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
5187 BuildVectorSDNode *BVN) {
5188 EVT VT = BVN->getValueType(0);
5189 unsigned NumElements = VT.getVectorNumElements();
5190
5191 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
5192 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
5193 // need a BUILD_VECTOR, add an additional placeholder operand for that
5194 // BUILD_VECTOR and store its operands in ResidueOps.
5195 GeneralShuffle GS(VT);
5196 SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps;
5197 bool FoundOne = false;
5198 for (unsigned I = 0; I < NumElements; ++I) {
5199 SDValue Op = BVN->getOperand(I);
5200 if (Op.getOpcode() == ISD::TRUNCATE)
5201 Op = Op.getOperand(0);
5202 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5203 Op.getOperand(1).getOpcode() == ISD::Constant) {
5204 unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
5205 if (!GS.add(Op.getOperand(0), Elem))
5206 return SDValue();
5207 FoundOne = true;
5208 } else if (Op.isUndef()) {
5209 GS.addUndef();
5210 } else {
5211 if (!GS.add(SDValue(), ResidueOps.size()))
5212 return SDValue();
5213 ResidueOps.push_back(BVN->getOperand(I));
5214 }
5215 }
5216
5217 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
5218 if (!FoundOne)
5219 return SDValue();
5220
5221 // Create the BUILD_VECTOR for the remaining elements, if any.
5222 if (!ResidueOps.empty()) {
5223 while (ResidueOps.size() < NumElements)
5224 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
5225 for (auto &Op : GS.Ops) {
5226 if (!Op.getNode()) {
5227 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
5228 break;
5229 }
5230 }
5231 }
5232 return GS.getNode(DAG, SDLoc(BVN));
5233}
5234
5235bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
5236 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
5237 return true;
5238 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
5239 return true;
5240 return false;
5241}
5242
5243// Combine GPR scalar values Elems into a vector of type VT.
5244SDValue
5245SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5246 SmallVectorImpl<SDValue> &Elems) const {
5247 // See whether there is a single replicated value.
5248 SDValue Single;
5249 unsigned int NumElements = Elems.size();
5250 unsigned int Count = 0;
5251 for (auto Elem : Elems) {
5252 if (!Elem.isUndef()) {
5253 if (!Single.getNode())
5254 Single = Elem;
5255 else if (Elem != Single) {
5256 Single = SDValue();
5257 break;
5258 }
5259 Count += 1;
5260 }
5261 }
5262 // There are three cases here:
5263 //
5264 // - if the only defined element is a loaded one, the best sequence
5265 // is a replicating load.
5266 //
5267 // - otherwise, if the only defined element is an i64 value, we will
5268 // end up with the same VLVGP sequence regardless of whether we short-cut
5269 // for replication or fall through to the later code.
5270 //
5271 // - otherwise, if the only defined element is an i32 or smaller value,
5272 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
5273 // This is only a win if the single defined element is used more than once.
5274 // In other cases we're better off using a single VLVGx.
5275 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
5276 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
5277
5278 // If all elements are loads, use VLREP/VLEs (below).
5279 bool AllLoads = true;
5280 for (auto Elem : Elems)
5281 if (!isVectorElementLoad(Elem)) {
5282 AllLoads = false;
5283 break;
5284 }
5285
5286 // The best way of building a v2i64 from two i64s is to use VLVGP.
5287 if (VT == MVT::v2i64 && !AllLoads)
5288 return joinDwords(DAG, DL, Elems[0], Elems[1]);
5289
5290 // Use a 64-bit merge high to combine two doubles.
5291 if (VT == MVT::v2f64 && !AllLoads)
5292 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5293
5294 // Build v4f32 values directly from the FPRs:
5295 //
5296 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
5297 // V V VMRHF
5298 // <ABxx> <CDxx>
5299 // V VMRHG
5300 // <ABCD>
5301 if (VT == MVT::v4f32 && !AllLoads) {
5302 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5303 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
5304 // Avoid unnecessary undefs by reusing the other operand.
5305 if (Op01.isUndef())
5306 Op01 = Op23;
5307 else if (Op23.isUndef())
5308 Op23 = Op01;
5309 // Merging identical replications is a no-op.
5310 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
5311 return Op01;
5312 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
5313 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
5314 SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
5315 DL, MVT::v2i64, Op01, Op23);
5316 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5317 }
5318
5319 // Collect the constant terms.
5320 SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue());
5321 SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
5322
5323 unsigned NumConstants = 0;
5324 for (unsigned I = 0; I < NumElements; ++I) {
5325 SDValue Elem = Elems[I];
5326 if (Elem.getOpcode() == ISD::Constant ||
5327 Elem.getOpcode() == ISD::ConstantFP) {
5328 NumConstants += 1;
5329 Constants[I] = Elem;
5330 Done[I] = true;
5331 }
5332 }
5333 // If there was at least one constant, fill in the other elements of
5334 // Constants with undefs to get a full vector constant and use that
5335 // as the starting point.
5336 SDValue Result;
5337 SDValue ReplicatedVal;
5338 if (NumConstants > 0) {
5339 for (unsigned I = 0; I < NumElements; ++I)
5340 if (!Constants[I].getNode())
5341 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
5342 Result = DAG.getBuildVector(VT, DL, Constants);
5343 } else {
5344 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
5345 // avoid a false dependency on any previous contents of the vector
5346 // register.
5347
5348 // Use a VLREP if at least one element is a load. Make sure to replicate
5349 // the load with the most elements having its value.
5350 std::map<const SDNode*, unsigned> UseCounts;
5351 SDNode *LoadMaxUses = nullptr;
5352 for (unsigned I = 0; I < NumElements; ++I)
5353 if (isVectorElementLoad(Elems[I])) {
5354 SDNode *Ld = Elems[I].getNode();
5355 UseCounts[Ld]++;
5356 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
5357 LoadMaxUses = Ld;
5358 }
5359 if (LoadMaxUses != nullptr) {
5360 ReplicatedVal = SDValue(LoadMaxUses, 0);
5361 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
5362 } else {
5363 // Try to use VLVGP.
5364 unsigned I1 = NumElements / 2 - 1;
5365 unsigned I2 = NumElements - 1;
5366 bool Def1 = !Elems[I1].isUndef();
5367 bool Def2 = !Elems[I2].isUndef();
5368 if (Def1 || Def2) {
5369 SDValue Elem1 = Elems[Def1 ? I1 : I2];
5370 SDValue Elem2 = Elems[Def2 ? I2 : I1];
5371 Result = DAG.getNode(ISD::BITCAST, DL, VT,
5372 joinDwords(DAG, DL, Elem1, Elem2));
5373 Done[I1] = true;
5374 Done[I2] = true;
5375 } else
5376 Result = DAG.getUNDEF(VT);
5377 }
5378 }
5379
5380 // Use VLVGx to insert the other elements.
5381 for (unsigned I = 0; I < NumElements; ++I)
5382 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
5383 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
5384 DAG.getConstant(I, DL, MVT::i32));
5385 return Result;
5386}
5387
5388SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
5389 SelectionDAG &DAG) const {
5390 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
5391 SDLoc DL(Op);
5392 EVT VT = Op.getValueType();
5393
5394 if (BVN->isConstant()) {
5395 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
5396 return Op;
5397
5398 // Fall back to loading it from memory.
5399 return SDValue();
5400 }
5401
5402 // See if we should use shuffles to construct the vector from other vectors.
5403 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
5404 return Res;
5405
5406 // Detect SCALAR_TO_VECTOR conversions.
5407 if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op))
5408 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
5409
5410 // Otherwise use buildVector to build the vector up from GPRs.
5411 unsigned NumElements = Op.getNumOperands();
5412 SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements);
5413 for (unsigned I = 0; I < NumElements; ++I)
5414 Ops[I] = Op.getOperand(I);
5415 return buildVector(DAG, DL, VT, Ops);
5416}
5417
5418SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
5419 SelectionDAG &DAG) const {
5420 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
5421 SDLoc DL(Op);
5422 EVT VT = Op.getValueType();
5423 unsigned NumElements = VT.getVectorNumElements();
5424
5425 if (VSN->isSplat()) {
5426 SDValue Op0 = Op.getOperand(0);
5427 unsigned Index = VSN->getSplatIndex();
5428 assert(Index < VT.getVectorNumElements() &&(static_cast <bool> (Index < VT.getVectorNumElements
() && "Splat index should be defined and in first operand"
) ? void (0) : __assert_fail ("Index < VT.getVectorNumElements() && \"Splat index should be defined and in first operand\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5429, __extension__
__PRETTY_FUNCTION__))
5429 "Splat index should be defined and in first operand")(static_cast <bool> (Index < VT.getVectorNumElements
() && "Splat index should be defined and in first operand"
) ? void (0) : __assert_fail ("Index < VT.getVectorNumElements() && \"Splat index should be defined and in first operand\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5429, __extension__
__PRETTY_FUNCTION__))
;
5430 // See whether the value we're splatting is directly available as a scalar.
5431 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5432 Op0.getOpcode() == ISD::BUILD_VECTOR)
5433 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
5434 // Otherwise keep it as a vector-to-vector operation.
5435 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
5436 DAG.getTargetConstant(Index, DL, MVT::i32));
5437 }
5438
5439 GeneralShuffle GS(VT);
5440 for (unsigned I = 0; I < NumElements; ++I) {
5441 int Elt = VSN->getMaskElt(I);
5442 if (Elt < 0)
5443 GS.addUndef();
5444 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
5445 unsigned(Elt) % NumElements))
5446 return SDValue();
5447 }
5448 return GS.getNode(DAG, SDLoc(VSN));
5449}
5450
5451SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
5452 SelectionDAG &DAG) const {
5453 SDLoc DL(Op);
5454 // Just insert the scalar into element 0 of an undefined vector.
5455 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
5456 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
5457 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
5458}
5459
5460SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
5461 SelectionDAG &DAG) const {
5462 // Handle insertions of floating-point values.
5463 SDLoc DL(Op);
5464 SDValue Op0 = Op.getOperand(0);
5465 SDValue Op1 = Op.getOperand(1);
5466 SDValue Op2 = Op.getOperand(2);
5467 EVT VT = Op.getValueType();
5468
5469 // Insertions into constant indices of a v2f64 can be done using VPDI.
5470 // However, if the inserted value is a bitcast or a constant then it's
5471 // better to use GPRs, as below.
5472 if (VT == MVT::v2f64 &&
5473 Op1.getOpcode() != ISD::BITCAST &&
5474 Op1.getOpcode() != ISD::ConstantFP &&
5475 Op2.getOpcode() == ISD::Constant) {
5476 uint64_t Index = cast<ConstantSDNode>(Op2)->getZExtValue();
5477 unsigned Mask = VT.getVectorNumElements() - 1;
5478 if (Index <= Mask)
5479 return Op;
5480 }
5481
5482 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
5483 MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
5484 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
5485 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
5486 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
5487 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
5488 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5489}
5490
5491SDValue
5492SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
5493 SelectionDAG &DAG) const {
5494 // Handle extractions of floating-point values.
5495 SDLoc DL(Op);
5496 SDValue Op0 = Op.getOperand(0);
5497 SDValue Op1 = Op.getOperand(1);
5498 EVT VT = Op.getValueType();
5499 EVT VecVT = Op0.getValueType();
5500
5501 // Extractions of constant indices can be done directly.
5502 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
5503 uint64_t Index = CIndexN->getZExtValue();
5504 unsigned Mask = VecVT.getVectorNumElements() - 1;
5505 if (Index <= Mask)
5506 return Op;
5507 }
5508
5509 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
5510 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
5511 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
5512 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
5513 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
5514 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5515}
5516
5517SDValue SystemZTargetLowering::
5518lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5519 SDValue PackedOp = Op.getOperand(0);
5520 EVT OutVT = Op.getValueType();
5521 EVT InVT = PackedOp.getValueType();
5522 unsigned ToBits = OutVT.getScalarSizeInBits();
5523 unsigned FromBits = InVT.getScalarSizeInBits();
5524 do {
5525 FromBits *= 2;
5526 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
5527 SystemZ::VectorBits / FromBits);
5528 PackedOp =
5529 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
5530 } while (FromBits != ToBits);
5531 return PackedOp;
5532}
5533
5534// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
5535SDValue SystemZTargetLowering::
5536lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5537 SDValue PackedOp = Op.getOperand(0);
5538 SDLoc DL(Op);
5539 EVT OutVT = Op.getValueType();
5540 EVT InVT = PackedOp.getValueType();
5541 unsigned InNumElts = InVT.getVectorNumElements();
5542 unsigned OutNumElts = OutVT.getVectorNumElements();
5543 unsigned NumInPerOut = InNumElts / OutNumElts;
5544
5545 SDValue ZeroVec =
5546 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
5547
5548 SmallVector<int, 16> Mask(InNumElts);
5549 unsigned ZeroVecElt = InNumElts;
5550 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
5551 unsigned MaskElt = PackedElt * NumInPerOut;
5552 unsigned End = MaskElt + NumInPerOut - 1;
5553 for (; MaskElt < End; MaskElt++)
5554 Mask[MaskElt] = ZeroVecElt++;
5555 Mask[MaskElt] = PackedElt;
5556 }
5557 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
5558 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
5559}
5560
5561SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
5562 unsigned ByScalar) const {
5563 // Look for cases where a vector shift can use the *_BY_SCALAR form.
5564 SDValue Op0 = Op.getOperand(0);
5565 SDValue Op1 = Op.getOperand(1);
5566 SDLoc DL(Op);
5567 EVT VT = Op.getValueType();
5568 unsigned ElemBitSize = VT.getScalarSizeInBits();
5569
5570 // See whether the shift vector is a splat represented as BUILD_VECTOR.
5571 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
5572 APInt SplatBits, SplatUndef;
5573 unsigned SplatBitSize;
5574 bool HasAnyUndefs;
5575 // Check for constant splats. Use ElemBitSize as the minimum element
5576 // width and reject splats that need wider elements.
5577 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
5578 ElemBitSize, true) &&
5579 SplatBitSize == ElemBitSize) {
5580 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
5581 DL, MVT::i32);
5582 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
5583 }
5584 // Check for variable splats.
5585 BitVector UndefElements;
5586 SDValue Splat = BVN->getSplatValue(&UndefElements);
5587 if (Splat) {
5588 // Since i32 is the smallest legal type, we either need a no-op
5589 // or a truncation.
5590 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
5591 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
5592 }
5593 }
5594
5595 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
5596 // and the shift amount is directly available in a GPR.
5597 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
5598 if (VSN->isSplat()) {
5599 SDValue VSNOp0 = VSN->getOperand(0);
5600 unsigned Index = VSN->getSplatIndex();
5601 assert(Index < VT.getVectorNumElements() &&(static_cast <bool> (Index < VT.getVectorNumElements
() && "Splat index should be defined and in first operand"
) ? void (0) : __assert_fail ("Index < VT.getVectorNumElements() && \"Splat index should be defined and in first operand\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5602, __extension__
__PRETTY_FUNCTION__))
5602 "Splat index should be defined and in first operand")(static_cast <bool> (Index < VT.getVectorNumElements
() && "Splat index should be defined and in first operand"
) ? void (0) : __assert_fail ("Index < VT.getVectorNumElements() && \"Splat index should be defined and in first operand\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5602, __extension__
__PRETTY_FUNCTION__))
;
5603 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5604 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
5605 // Since i32 is the smallest legal type, we either need a no-op
5606 // or a truncation.
5607 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
5608 VSNOp0.getOperand(Index));
5609 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
5610 }
5611 }
5612 }
5613
5614 // Otherwise just treat the current form as legal.
5615 return Op;
5616}
5617
5618SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
5619 SelectionDAG &DAG) const {
5620 switch (Op.getOpcode()) {
5621 case ISD::FRAMEADDR:
5622 return lowerFRAMEADDR(Op, DAG);
5623 case ISD::RETURNADDR:
5624 return lowerRETURNADDR(Op, DAG);
5625 case ISD::BR_CC:
5626 return lowerBR_CC(Op, DAG);
5627 case ISD::SELECT_CC:
5628 return lowerSELECT_CC(Op, DAG);
5629 case ISD::SETCC:
5630 return lowerSETCC(Op, DAG);
5631 case ISD::STRICT_FSETCC:
5632 return lowerSTRICT_FSETCC(Op, DAG, false);
5633 case ISD::STRICT_FSETCCS:
5634 return lowerSTRICT_FSETCC(Op, DAG, true);
5635 case ISD::GlobalAddress:
5636 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
5637 case ISD::GlobalTLSAddress:
5638 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
5639 case ISD::BlockAddress:
5640 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
5641 case ISD::JumpTable:
5642 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
5643 case ISD::ConstantPool:
5644 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
5645 case ISD::BITCAST:
5646 return lowerBITCAST(Op, DAG);
5647 case ISD::VASTART:
5648 return lowerVASTART(Op, DAG);
5649 case ISD::VACOPY:
5650 return lowerVACOPY(Op, DAG);
5651 case ISD::DYNAMIC_STACKALLOC:
5652 return lowerDYNAMIC_STACKALLOC(Op, DAG);
5653 case ISD::GET_DYNAMIC_AREA_OFFSET:
5654 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
5655 case ISD::SMUL_LOHI:
5656 return lowerSMUL_LOHI(Op, DAG);
5657 case ISD::UMUL_LOHI:
5658 return lowerUMUL_LOHI(Op, DAG);
5659 case ISD::SDIVREM:
5660 return lowerSDIVREM(Op, DAG);
5661 case ISD::UDIVREM:
5662 return lowerUDIVREM(Op, DAG);
5663 case ISD::SADDO:
5664 case ISD::SSUBO:
5665 case ISD::UADDO:
5666 case ISD::USUBO:
5667 return lowerXALUO(Op, DAG);
5668 case ISD::ADDCARRY:
5669 case ISD::SUBCARRY:
5670 return lowerADDSUBCARRY(Op, DAG);
5671 case ISD::OR:
5672 return lowerOR(Op, DAG);
5673 case ISD::CTPOP:
5674 return lowerCTPOP(Op, DAG);
5675 case ISD::ATOMIC_FENCE:
5676 return lowerATOMIC_FENCE(Op, DAG);
5677 case ISD::ATOMIC_SWAP:
5678 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
5679 case ISD::ATOMIC_STORE:
5680 return lowerATOMIC_STORE(Op, DAG);
5681 case ISD::ATOMIC_LOAD:
5682 return lowerATOMIC_LOAD(Op, DAG);
5683 case ISD::ATOMIC_LOAD_ADD:
5684 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
5685 case ISD::ATOMIC_LOAD_SUB:
5686 return lowerATOMIC_LOAD_SUB(Op, DAG);
5687 case ISD::ATOMIC_LOAD_AND:
5688 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
5689 case ISD::ATOMIC_LOAD_OR:
5690 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
5691 case ISD::ATOMIC_LOAD_XOR:
5692 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
5693 case ISD::ATOMIC_LOAD_NAND:
5694 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
5695 case ISD::ATOMIC_LOAD_MIN:
5696 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
5697 case ISD::ATOMIC_LOAD_MAX:
5698 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
5699 case ISD::ATOMIC_LOAD_UMIN:
5700 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
5701 case ISD::ATOMIC_LOAD_UMAX:
5702 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
5703 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
5704 return lowerATOMIC_CMP_SWAP(Op, DAG);
5705 case ISD::STACKSAVE:
5706 return lowerSTACKSAVE(Op, DAG);
5707 case ISD::STACKRESTORE:
5708 return lowerSTACKRESTORE(Op, DAG);
5709 case ISD::PREFETCH:
5710 return lowerPREFETCH(Op, DAG);
5711 case ISD::INTRINSIC_W_CHAIN:
5712 return lowerINTRINSIC_W_CHAIN(Op, DAG);
5713 case ISD::INTRINSIC_WO_CHAIN:
5714 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
5715 case ISD::BUILD_VECTOR:
5716 return lowerBUILD_VECTOR(Op, DAG);
5717 case ISD::VECTOR_SHUFFLE:
5718 return lowerVECTOR_SHUFFLE(Op, DAG);
5719 case ISD::SCALAR_TO_VECTOR:
5720 return lowerSCALAR_TO_VECTOR(Op, DAG);
5721 case ISD::INSERT_VECTOR_ELT:
5722 return lowerINSERT_VECTOR_ELT(Op, DAG);
5723 case ISD::EXTRACT_VECTOR_ELT:
5724 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
5725 case ISD::SIGN_EXTEND_VECTOR_INREG:
5726 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
5727 case ISD::ZERO_EXTEND_VECTOR_INREG:
5728 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
5729 case ISD::SHL:
5730 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
5731 case ISD::SRL:
5732 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
5733 case ISD::SRA:
5734 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
5735 default:
5736 llvm_unreachable("Unexpected node to lower")::llvm::llvm_unreachable_internal("Unexpected node to lower",
"llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5736)
;
5737 }
5738}
5739
5740// Lower operations with invalid operand or result types (currently used
5741// only for 128-bit integer types).
5742void
5743SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
5744 SmallVectorImpl<SDValue> &Results,
5745 SelectionDAG &DAG) const {
5746 switch (N->getOpcode()) {
5747 case ISD::ATOMIC_LOAD: {
5748 SDLoc DL(N);
5749 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
5750 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
5751 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5752 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
5753 DL, Tys, Ops, MVT::i128, MMO);
5754 Results.push_back(lowerGR128ToI128(DAG, Res));
5755 Results.push_back(Res.getValue(1));
5756 break;
5757 }
5758 case ISD::ATOMIC_STORE: {
5759 SDLoc DL(N);
5760 SDVTList Tys = DAG.getVTList(MVT::Other);
5761 SDValue Ops[] = { N->getOperand(0),
5762 lowerI128ToGR128(DAG, N->getOperand(2)),
5763 N->getOperand(1) };
5764 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5765 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128,
5766 DL, Tys, Ops, MVT::i128, MMO);
5767 // We have to enforce sequential consistency by performing a
5768 // serialization operation after the store.
5769 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
5770 AtomicOrdering::SequentiallyConsistent)
5771 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
5772 MVT::Other, Res), 0);
5773 Results.push_back(Res);
5774 break;
5775 }
5776 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
5777 SDLoc DL(N);
5778 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
5779 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
5780 lowerI128ToGR128(DAG, N->getOperand(2)),
5781 lowerI128ToGR128(DAG, N->getOperand(3)) };
5782 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5783 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128,
5784 DL, Tys, Ops, MVT::i128, MMO);
5785 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
5786 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);
5787 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
5788 Results.push_back(lowerGR128ToI128(DAG, Res));
5789 Results.push_back(Success);
5790 Results.push_back(Res.getValue(2));
5791 break;
5792 }
5793 case ISD::BITCAST: {
5794 SDValue Src = N->getOperand(0);
5795 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
5796 !useSoftFloat()) {
5797 SDLoc DL(N);
5798 SDValue Lo, Hi;
5799 if (getRepRegClassFor(MVT::f128) == &SystemZ::VR128BitRegClass) {
5800 SDValue VecBC = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Src);
5801 Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
5802 DAG.getConstant(1, DL, MVT::i32));
5803 Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
5804 DAG.getConstant(0, DL, MVT::i32));
5805 } else {
5806 assert(getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass &&(static_cast <bool> (getRepRegClassFor(MVT::f128) == &
SystemZ::FP128BitRegClass && "Unrecognized register class for f128."
) ? void (0) : __assert_fail ("getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass && \"Unrecognized register class for f128.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5807, __extension__
__PRETTY_FUNCTION__))
5807 "Unrecognized register class for f128.")(static_cast <bool> (getRepRegClassFor(MVT::f128) == &
SystemZ::FP128BitRegClass && "Unrecognized register class for f128."
) ? void (0) : __assert_fail ("getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass && \"Unrecognized register class for f128.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5807, __extension__
__PRETTY_FUNCTION__))
;
5808 SDValue LoFP = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
5809 DL, MVT::f64, Src);
5810 SDValue HiFP = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
5811 DL, MVT::f64, Src);
5812 Lo = DAG.getNode(ISD::BITCAST, DL, MVT::i64, LoFP);
5813 Hi = DAG.getNode(ISD::BITCAST, DL, MVT::i64, HiFP);
5814 }
5815 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi));
5816 }
5817 break;
5818 }
5819 default:
5820 llvm_unreachable("Unexpected node to lower")::llvm::llvm_unreachable_internal("Unexpected node to lower",
"llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 5820)
;
5821 }
5822}
5823
5824void
5825SystemZTargetLowering::ReplaceNodeResults(SDNode *N,
5826 SmallVectorImpl<SDValue> &Results,
5827 SelectionDAG &DAG) const {
5828 return LowerOperationWrapper(N, Results, DAG);
5829}
5830
5831const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
5832#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
5833 switch ((SystemZISD::NodeType)Opcode) {
5834 case SystemZISD::FIRST_NUMBER: break;
5835 OPCODE(RET_FLAG);
5836 OPCODE(CALL);
5837 OPCODE(SIBCALL);
5838 OPCODE(TLS_GDCALL);
5839 OPCODE(TLS_LDCALL);
5840 OPCODE(PCREL_WRAPPER);
5841 OPCODE(PCREL_OFFSET);
5842 OPCODE(ICMP);
5843 OPCODE(FCMP);
5844 OPCODE(STRICT_FCMP);
5845 OPCODE(STRICT_FCMPS);
5846 OPCODE(TM);
5847 OPCODE(BR_CCMASK);
5848 OPCODE(SELECT_CCMASK);
5849 OPCODE(ADJDYNALLOC);
5850 OPCODE(PROBED_ALLOCA);
5851 OPCODE(POPCNT);
5852 OPCODE(SMUL_LOHI);
5853 OPCODE(UMUL_LOHI);
5854 OPCODE(SDIVREM);
5855 OPCODE(UDIVREM);
5856 OPCODE(SADDO);
5857 OPCODE(SSUBO);
5858 OPCODE(UADDO);
5859 OPCODE(USUBO);
5860 OPCODE(ADDCARRY);
5861 OPCODE(SUBCARRY);
5862 OPCODE(GET_CCMASK);
5863 OPCODE(MVC);
5864 OPCODE(NC);
5865 OPCODE(OC);
5866 OPCODE(XC);
5867 OPCODE(CLC);
5868 OPCODE(MEMSET_MVC);
5869 OPCODE(STPCPY);
5870 OPCODE(STRCMP);
5871 OPCODE(SEARCH_STRING);
5872 OPCODE(IPM);
5873 OPCODE(MEMBARRIER);
5874 OPCODE(TBEGIN);
5875 OPCODE(TBEGIN_NOFLOAT);
5876 OPCODE(TEND);
5877 OPCODE(BYTE_MASK);
5878 OPCODE(ROTATE_MASK);
5879 OPCODE(REPLICATE);
5880 OPCODE(JOIN_DWORDS);
5881 OPCODE(SPLAT);
5882 OPCODE(MERGE_HIGH);
5883 OPCODE(MERGE_LOW);
5884 OPCODE(SHL_DOUBLE);
5885 OPCODE(PERMUTE_DWORDS);
5886 OPCODE(PERMUTE);
5887 OPCODE(PACK);
5888 OPCODE(PACKS_CC);
5889 OPCODE(PACKLS_CC);
5890 OPCODE(UNPACK_HIGH);
5891 OPCODE(UNPACKL_HIGH);
5892 OPCODE(UNPACK_LOW);
5893 OPCODE(UNPACKL_LOW);
5894 OPCODE(VSHL_BY_SCALAR);
5895 OPCODE(VSRL_BY_SCALAR);
5896 OPCODE(VSRA_BY_SCALAR);
5897 OPCODE(VSUM);
5898 OPCODE(VICMPE);
5899 OPCODE(VICMPH);
5900 OPCODE(VICMPHL);
5901 OPCODE(VICMPES);
5902 OPCODE(VICMPHS);
5903 OPCODE(VICMPHLS);
5904 OPCODE(VFCMPE);
5905 OPCODE(STRICT_VFCMPE);
5906 OPCODE(STRICT_VFCMPES);
5907 OPCODE(VFCMPH);
5908 OPCODE(STRICT_VFCMPH);
5909 OPCODE(STRICT_VFCMPHS);
5910 OPCODE(VFCMPHE);
5911 OPCODE(STRICT_VFCMPHE);
5912 OPCODE(STRICT_VFCMPHES);
5913 OPCODE(VFCMPES);
5914 OPCODE(VFCMPHS);
5915 OPCODE(VFCMPHES);
5916 OPCODE(VFTCI);
5917 OPCODE(VEXTEND);
5918 OPCODE(STRICT_VEXTEND);
5919 OPCODE(VROUND);
5920 OPCODE(STRICT_VROUND);
5921 OPCODE(VTM);
5922 OPCODE(VFAE_CC);
5923 OPCODE(VFAEZ_CC);
5924 OPCODE(VFEE_CC);
5925 OPCODE(VFEEZ_CC);
5926 OPCODE(VFENE_CC);
5927 OPCODE(VFENEZ_CC);
5928 OPCODE(VISTR_CC);
5929 OPCODE(VSTRC_CC);
5930 OPCODE(VSTRCZ_CC);
5931 OPCODE(VSTRS_CC);
5932 OPCODE(VSTRSZ_CC);
5933 OPCODE(TDC);
5934 OPCODE(ATOMIC_SWAPW);
5935 OPCODE(ATOMIC_LOADW_ADD);
5936 OPCODE(ATOMIC_LOADW_SUB);
5937 OPCODE(ATOMIC_LOADW_AND);
5938 OPCODE(ATOMIC_LOADW_OR);
5939 OPCODE(ATOMIC_LOADW_XOR);
5940 OPCODE(ATOMIC_LOADW_NAND);
5941 OPCODE(ATOMIC_LOADW_MIN);
5942 OPCODE(ATOMIC_LOADW_MAX);
5943 OPCODE(ATOMIC_LOADW_UMIN);
5944 OPCODE(ATOMIC_LOADW_UMAX);
5945 OPCODE(ATOMIC_CMP_SWAPW);
5946 OPCODE(ATOMIC_CMP_SWAP);
5947 OPCODE(ATOMIC_LOAD_128);
5948 OPCODE(ATOMIC_STORE_128);
5949 OPCODE(ATOMIC_CMP_SWAP_128);
5950 OPCODE(LRV);
5951 OPCODE(STRV);
5952 OPCODE(VLER);
5953 OPCODE(VSTER);
5954 OPCODE(PREFETCH);
5955 }
5956 return nullptr;
5957#undef OPCODE
5958}
5959
5960// Return true if VT is a vector whose elements are a whole number of bytes
5961// in width. Also check for presence of vector support.
5962bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
5963 if (!Subtarget.hasVector())
5964 return false;
5965
5966 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
5967}
5968
5969// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
5970// producing a result of type ResVT. Op is a possibly bitcast version
5971// of the input vector and Index is the index (based on type VecVT) that
5972// should be extracted. Return the new extraction if a simplification
5973// was possible or if Force is true.
5974SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
5975 EVT VecVT, SDValue Op,
5976 unsigned Index,
5977 DAGCombinerInfo &DCI,
5978 bool Force) const {
5979 SelectionDAG &DAG = DCI.DAG;
5980
5981 // The number of bytes being extracted.
5982 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
5983
5984 for (;;) {
5985 unsigned Opcode = Op.getOpcode();
5986 if (Opcode == ISD::BITCAST)
5987 // Look through bitcasts.
5988 Op = Op.getOperand(0);
5989 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
5990 canTreatAsByteVector(Op.getValueType())) {
5991 // Get a VPERM-like permute mask and see whether the bytes covered
5992 // by the extracted element are a contiguous sequence from one
5993 // source operand.
5994 SmallVector<int, SystemZ::VectorBytes> Bytes;
5995 if (!getVPermMask(Op, Bytes))
5996 break;
5997 int First;
5998 if (!getShuffleInput(Bytes, Index * BytesPerElement,
5999 BytesPerElement, First))
6000 break;
6001 if (First < 0)
6002 return DAG.getUNDEF(ResVT);
6003 // Make sure the contiguous sequence starts at a multiple of the
6004 // original element size.
6005 unsigned Byte = unsigned(First) % Bytes.size();
6006 if (Byte % BytesPerElement != 0)
6007 break;
6008 // We can get the extracted value directly from an input.
6009 Index = Byte / BytesPerElement;
6010 Op = Op.getOperand(unsigned(First) / Bytes.size());
6011 Force = true;
6012 } else if (Opcode == ISD::BUILD_VECTOR &&
6013 canTreatAsByteVector(Op.getValueType())) {
6014 // We can only optimize this case if the BUILD_VECTOR elements are
6015 // at least as wide as the extracted value.
6016 EVT OpVT = Op.getValueType();
6017 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6018 if (OpBytesPerElement < BytesPerElement)
6019 break;
6020 // Make sure that the least-significant bit of the extracted value
6021 // is the least significant bit of an input.
6022 unsigned End = (Index + 1) * BytesPerElement;
6023 if (End % OpBytesPerElement != 0)
6024 break;
6025 // We're extracting the low part of one operand of the BUILD_VECTOR.
6026 Op = Op.getOperand(End / OpBytesPerElement - 1);
6027 if (!Op.getValueType().isInteger()) {
6028 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
6029 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
6030 DCI.AddToWorklist(Op.getNode());
6031 }
6032 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
6033 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
6034 if (VT != ResVT) {
6035 DCI.AddToWorklist(Op.getNode());
6036 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
6037 }
6038 return Op;
6039 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6040 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
6041 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6042 canTreatAsByteVector(Op.getValueType()) &&
6043 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
6044 // Make sure that only the unextended bits are significant.
6045 EVT ExtVT = Op.getValueType();
6046 EVT OpVT = Op.getOperand(0).getValueType();
6047 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
6048 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6049 unsigned Byte = Index * BytesPerElement;
6050 unsigned SubByte = Byte % ExtBytesPerElement;
6051 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
6052 if (SubByte < MinSubByte ||
6053 SubByte + BytesPerElement > ExtBytesPerElement)
6054 break;
6055 // Get the byte offset of the unextended element
6056 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
6057 // ...then add the byte offset relative to that element.
6058 Byte += SubByte - MinSubByte;
6059 if (Byte % BytesPerElement != 0)
6060 break;
6061 Op = Op.getOperand(0);
6062 Index = Byte / BytesPerElement;
6063 Force = true;
6064 } else
6065 break;
6066 }
6067 if (Force) {
6068 if (Op.getValueType() != VecVT) {
6069 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
6070 DCI.AddToWorklist(Op.getNode());
6071 }
6072 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
6073 DAG.getConstant(Index, DL, MVT::i32));
6074 }
6075 return SDValue();
6076}
6077
6078// Optimize vector operations in scalar value Op on the basis that Op
6079// is truncated to TruncVT.
6080SDValue SystemZTargetLowering::combineTruncateExtract(
6081 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
6082 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
6083 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
6084 // of type TruncVT.
6085 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6086 TruncVT.getSizeInBits() % 8 == 0) {
6087 SDValue Vec = Op.getOperand(0);
6088 EVT VecVT = Vec.getValueType();
6089 if (canTreatAsByteVector(VecVT)) {
6090 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
6091 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6092 unsigned TruncBytes = TruncVT.getStoreSize();
6093 if (BytesPerElement % TruncBytes == 0) {
6094 // Calculate the value of Y' in the above description. We are
6095 // splitting the original elements into Scale equal-sized pieces
6096 // and for truncation purposes want the last (least-significant)
6097 // of these pieces for IndexN. This is easiest to do by calculating
6098 // the start index of the following element and then subtracting 1.
6099 unsigned Scale = BytesPerElement / TruncBytes;
6100 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
6101
6102 // Defer the creation of the bitcast from X to combineExtract,
6103 // which might be able to optimize the extraction.
6104 VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
6105 VecVT.getStoreSize() / TruncBytes);
6106 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
6107 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
6108 }
6109 }
6110 }
6111 }
6112 return SDValue();
6113}
6114
6115SDValue SystemZTargetLowering::combineZERO_EXTEND(
6116 SDNode *N, DAGCombinerInfo &DCI) const {
6117 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
6118 SelectionDAG &DAG = DCI.DAG;
6119 SDValue N0 = N->getOperand(0);
6120 EVT VT = N->getValueType(0);
6121 if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) {
6122 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
6123 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6124 if (TrueOp && FalseOp) {
6125 SDLoc DL(N0);
6126 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
6127 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
6128 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
6129 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
6130 // If N0 has multiple uses, change other uses as well.
6131 if (!N0.hasOneUse()) {
6132 SDValue TruncSelect =
6133 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
6134 DCI.CombineTo(N0.getNode(), TruncSelect);
6135 }
6136 return NewSelect;
6137 }
6138 }
6139 return SDValue();
6140}
6141
6142SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
6143 SDNode *N, DAGCombinerInfo &DCI) const {
6144 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
6145 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
6146 // into (select_cc LHS, RHS, -1, 0, COND)
6147 SelectionDAG &DAG = DCI.DAG;
6148 SDValue N0 = N->getOperand(0);
6149 EVT VT = N->getValueType(0);
6150 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
6151 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
6152 N0 = N0.getOperand(0);
6153 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
6154 SDLoc DL(N0);
6155 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
6156 DAG.getConstant(-1, DL, VT), DAG.getConstant(0, DL, VT),
6157 N0.getOperand(2) };
6158 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
6159 }
6160 return SDValue();
6161}
6162
6163SDValue SystemZTargetLowering::combineSIGN_EXTEND(
6164 SDNode *N, DAGCombinerInfo &DCI) const {
6165 // Convert (sext (ashr (shl X, C1), C2)) to
6166 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
6167 // cheap as narrower ones.
6168 SelectionDAG &DAG = DCI.DAG;
6169 SDValue N0 = N->getOperand(0);
6170 EVT VT = N->getValueType(0);
6171 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
6172 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6173 SDValue Inner = N0.getOperand(0);
6174 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
6175 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
6176 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
6177 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
6178 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
6179 EVT ShiftVT = N0.getOperand(1).getValueType();
6180 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
6181 Inner.getOperand(0));
6182 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
6183 DAG.getConstant(NewShlAmt, SDLoc(Inner),
6184 ShiftVT));
6185 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
6186 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
6187 }
6188 }
6189 }
6190 return SDValue();
6191}
6192
6193SDValue SystemZTargetLowering::combineMERGE(
6194 SDNode *N, DAGCombinerInfo &DCI) const {
6195 SelectionDAG &DAG = DCI.DAG;
6196 unsigned Opcode = N->getOpcode();
6197 SDValue Op0 = N->getOperand(0);
6198 SDValue Op1 = N->getOperand(1);
6199 if (Op0.getOpcode() == ISD::BITCAST)
6200 Op0 = Op0.getOperand(0);
6201 if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
6202 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
6203 // for v4f32.
6204 if (Op1 == N->getOperand(0))
6205 return Op1;
6206 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
6207 EVT VT = Op1.getValueType();
6208 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
6209 if (ElemBytes <= 4) {
6210 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
6211 SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
6212 EVT InVT = VT.changeVectorElementTypeToInteger();
6213 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
6214 SystemZ::VectorBytes / ElemBytes / 2);
6215 if (VT != InVT) {
6216 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
6217 DCI.AddToWorklist(Op1.getNode());
6218 }
6219 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
6220 DCI.AddToWorklist(Op.getNode());
6221 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
6222 }
6223 }
6224 return SDValue();
6225}
6226
6227SDValue SystemZTargetLowering::combineLOAD(
6228 SDNode *N, DAGCombinerInfo &DCI) const {
6229 SelectionDAG &DAG = DCI.DAG;
6230 EVT LdVT = N->getValueType(0);
6231 if (LdVT.isVector() || LdVT.isInteger())
6232 return SDValue();
6233 // Transform a scalar load that is REPLICATEd as well as having other
6234 // use(s) to the form where the other use(s) use the first element of the
6235 // REPLICATE instead of the load. Otherwise instruction selection will not
6236 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
6237 // point loads.
6238
6239 SDValue Replicate;
6240 SmallVector<SDNode*, 8> OtherUses;
6241 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
6242 UI != UE; ++UI) {
6243 if (UI->getOpcode() == SystemZISD::REPLICATE) {
6244 if (Replicate)
6245 return SDValue(); // Should never happen
6246 Replicate = SDValue(*UI, 0);
6247 }
6248 else if (UI.getUse().getResNo() == 0)
6249 OtherUses.push_back(*UI);
6250 }
6251 if (!Replicate || OtherUses.empty())
6252 return SDValue();
6253
6254 SDLoc DL(N);
6255 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
6256 Replicate, DAG.getConstant(0, DL, MVT::i32));
6257 // Update uses of the loaded Value while preserving old chains.
6258 for (SDNode *U : OtherUses) {
6259 SmallVector<SDValue, 8> Ops;
6260 for (SDValue Op : U->ops())
6261 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
6262 DAG.UpdateNodeOperands(U, Ops);
6263 }
6264 return SDValue(N, 0);
6265}
6266
6267bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
6268 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
6269 return true;
6270 if (Subtarget.hasVectorEnhancements2())
6271 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64)
6272 return true;
6273 return false;
6274}
6275
6276static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) {
6277 if (!VT.isVector() || !VT.isSimple() ||
6278 VT.getSizeInBits() != 128 ||
6279 VT.getScalarSizeInBits() % 8 != 0)
6280 return false;
6281
6282 unsigned NumElts = VT.getVectorNumElements();
6283 for (unsigned i = 0; i < NumElts; ++i) {
6284 if (M[i] < 0) continue; // ignore UNDEF indices
6285 if ((unsigned) M[i] != NumElts - 1 - i)
6286 return false;
6287 }
6288
6289 return true;
6290}
6291
6292SDValue SystemZTargetLowering::combineSTORE(
6293 SDNode *N, DAGCombinerInfo &DCI) const {
6294 SelectionDAG &DAG = DCI.DAG;
6295 auto *SN = cast<StoreSDNode>(N);
6296 auto &Op1 = N->getOperand(1);
6297 EVT MemVT = SN->getMemoryVT();
6298 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
6299 // for the extraction to be done on a vMiN value, so that we can use VSTE.
6300 // If X has wider elements then convert it to:
6301 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
6302 if (MemVT.isInteger() && SN->isTruncatingStore()) {
6303 if (SDValue Value =
6304 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
6305 DCI.AddToWorklist(Value.getNode());
6306
6307 // Rewrite the store with the new form of stored value.
6308 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
6309 SN->getBasePtr(), SN->getMemoryVT(),
6310 SN->getMemOperand());
6311 }
6312 }
6313 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
6314 if (!SN->isTruncatingStore() &&
6315 Op1.getOpcode() == ISD::BSWAP &&
6316 Op1.getNode()->hasOneUse() &&
6317 canLoadStoreByteSwapped(Op1.getValueType())) {
6318
6319 SDValue BSwapOp = Op1.getOperand(0);
6320
6321 if (BSwapOp.getValueType() == MVT::i16)
6322 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
6323
6324 SDValue Ops[] = {
6325 N->getOperand(0), BSwapOp, N->getOperand(2)
6326 };
6327
6328 return
6329 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
6330 Ops, MemVT, SN->getMemOperand());
6331 }
6332 // Combine STORE (element-swap) into VSTER
6333 if (!SN->isTruncatingStore() &&
6334 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
6335 Op1.getNode()->hasOneUse() &&
6336 Subtarget.hasVectorEnhancements2()) {
6337 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
6338 ArrayRef<int> ShuffleMask = SVN->getMask();
6339 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
6340 SDValue Ops[] = {
6341 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
6342 };
6343
6344 return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N),
6345 DAG.getVTList(MVT::Other),
6346 Ops, MemVT, SN->getMemOperand());
6347 }
6348 }
6349
6350 return SDValue();
6351}
6352
6353SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
6354 SDNode *N, DAGCombinerInfo &DCI) const {
6355 SelectionDAG &DAG = DCI.DAG;
6356 // Combine element-swap (LOAD) into VLER
6357 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
6358 N->getOperand(0).hasOneUse() &&
6359 Subtarget.hasVectorEnhancements2()) {
6360 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
6361 ArrayRef<int> ShuffleMask = SVN->getMask();
6362 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
6363 SDValue Load = N->getOperand(0);
6364 LoadSDNode *LD = cast<LoadSDNode>(Load);
6365
6366 // Create the element-swapping load.
6367 SDValue Ops[] = {
6368 LD->getChain(), // Chain
6369 LD->getBasePtr() // Ptr
6370 };
6371 SDValue ESLoad =
6372 DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N),
6373 DAG.getVTList(LD->getValueType(0), MVT::Other),
6374 Ops, LD->getMemoryVT(), LD->getMemOperand());
6375
6376 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
6377 // by the load dead.
6378 DCI.CombineTo(N, ESLoad);
6379
6380 // Next, combine the load away, we give it a bogus result value but a real
6381 // chain result. The result value is dead because the shuffle is dead.
6382 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
6383
6384 // Return N so it doesn't get rechecked!
6385 return SDValue(N, 0);
6386 }
6387 }
6388
6389 return SDValue();
6390}
6391
6392SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
6393 SDNode *N, DAGCombinerInfo &DCI) const {
6394 SelectionDAG &DAG = DCI.DAG;
6395
6396 if (!Subtarget.hasVector())
6397 return SDValue();
6398
6399 // Look through bitcasts that retain the number of vector elements.
6400 SDValue Op = N->getOperand(0);
6401 if (Op.getOpcode() == ISD::BITCAST &&
6402 Op.getValueType().isVector() &&
6403 Op.getOperand(0).getValueType().isVector() &&
6404 Op.getValueType().getVectorNumElements() ==
6405 Op.getOperand(0).getValueType().getVectorNumElements())
6406 Op = Op.getOperand(0);
6407
6408 // Pull BSWAP out of a vector extraction.
6409 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
6410 EVT VecVT = Op.getValueType();
6411 EVT EltVT = VecVT.getVectorElementType();
6412 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
6413 Op.getOperand(0), N->getOperand(1));
6414 DCI.AddToWorklist(Op.getNode());
6415 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
6416 if (EltVT != N->getValueType(0)) {
6417 DCI.AddToWorklist(Op.getNode());
6418 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
6419 }
6420 return Op;
6421 }
6422
6423 // Try to simplify a vector extraction.
6424 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
6425 SDValue Op0 = N->getOperand(0);
6426 EVT VecVT = Op0.getValueType();
6427 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
6428 IndexN->getZExtValue(), DCI, false);
6429 }
6430 return SDValue();
6431}
6432
6433SDValue SystemZTargetLowering::combineJOIN_DWORDS(
6434 SDNode *N, DAGCombinerInfo &DCI) const {
6435 SelectionDAG &DAG = DCI.DAG;
6436 // (join_dwords X, X) == (replicate X)
6437 if (N->getOperand(0) == N->getOperand(1))
6438 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
6439 N->getOperand(0));
6440 return SDValue();
6441}
6442
6443static SDValue MergeInputChains(SDNode *N1, SDNode *N2) {
6444 SDValue Chain1 = N1->getOperand(0);
6445 SDValue Chain2 = N2->getOperand(0);
6446
6447 // Trivial case: both nodes take the same chain.
6448 if (Chain1 == Chain2)
6449 return Chain1;
6450
6451 // FIXME - we could handle more complex cases via TokenFactor,
6452 // assuming we can verify that this would not create a cycle.
6453 return SDValue();
6454}
6455
6456SDValue SystemZTargetLowering::combineFP_ROUND(
6457 SDNode *N, DAGCombinerInfo &DCI) const {
6458
6459 if (!Subtarget.hasVector())
6460 return SDValue();
6461
6462 // (fpround (extract_vector_elt X 0))
6463 // (fpround (extract_vector_elt X 1)) ->
6464 // (extract_vector_elt (VROUND X) 0)
6465 // (extract_vector_elt (VROUND X) 2)
6466 //
6467 // This is a special case since the target doesn't really support v2f32s.
6468 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
6469 SelectionDAG &DAG = DCI.DAG;
6470 SDValue Op0 = N->getOperand(OpNo);
6471 if (N->getValueType(0) == MVT::f32 &&
6472 Op0.hasOneUse() &&
6473 Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6474 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
6475 Op0.getOperand(1).getOpcode() == ISD::Constant &&
6476 cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
6477 SDValue Vec = Op0.getOperand(0);
6478 for (auto *U : Vec->uses()) {
6479 if (U != Op0.getNode() &&
6480 U->hasOneUse() &&
6481 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6482 U->getOperand(0) == Vec &&
6483 U->getOperand(1).getOpcode() == ISD::Constant &&
6484 cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
6485 SDValue OtherRound = SDValue(*U->use_begin(), 0);
6486 if (OtherRound.getOpcode() == N->getOpcode() &&
6487 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
6488 OtherRound.getValueType() == MVT::f32) {
6489 SDValue VRound, Chain;
6490 if (N->isStrictFPOpcode()) {
6491 Chain = MergeInputChains(N, OtherRound.getNode());
6492 if (!Chain)
6493 continue;
6494 VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N),
6495 {MVT::v4f32, MVT::Other}, {Chain, Vec});
6496 Chain = VRound.getValue(1);
6497 } else
6498 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
6499 MVT::v4f32, Vec);
6500 DCI.AddToWorklist(VRound.getNode());
6501 SDValue Extract1 =
6502 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
6503 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
6504 DCI.AddToWorklist(Extract1.getNode());
6505 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
6506 if (Chain)
6507 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
6508 SDValue Extract0 =
6509 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
6510 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
6511 if (Chain)
6512 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
6513 N->getVTList(), Extract0, Chain);
6514 return Extract0;
6515 }
6516 }
6517 }
6518 }
6519 return SDValue();
6520}
6521
6522SDValue SystemZTargetLowering::combineFP_EXTEND(
6523 SDNode *N, DAGCombinerInfo &DCI) const {
6524
6525 if (!Subtarget.hasVector())
6526 return SDValue();
6527
6528 // (fpextend (extract_vector_elt X 0))
6529 // (fpextend (extract_vector_elt X 2)) ->
6530 // (extract_vector_elt (VEXTEND X) 0)
6531 // (extract_vector_elt (VEXTEND X) 1)
6532 //
6533 // This is a special case since the target doesn't really support v2f32s.
6534 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
6535 SelectionDAG &DAG = DCI.DAG;
6536 SDValue Op0 = N->getOperand(OpNo);
6537 if (N->getValueType(0) == MVT::f64 &&
6538 Op0.hasOneUse() &&
6539 Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6540 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
6541 Op0.getOperand(1).getOpcode() == ISD::Constant &&
6542 cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
6543 SDValue Vec = Op0.getOperand(0);
6544 for (auto *U : Vec->uses()) {
6545 if (U != Op0.getNode() &&
6546 U->hasOneUse() &&
6547 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6548 U->getOperand(0) == Vec &&
6549 U->getOperand(1).getOpcode() == ISD::Constant &&
6550 cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 2) {
6551 SDValue OtherExtend = SDValue(*U->use_begin(), 0);
6552 if (OtherExtend.getOpcode() == N->getOpcode() &&
6553 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
6554 OtherExtend.getValueType() == MVT::f64) {
6555 SDValue VExtend, Chain;
6556 if (N->isStrictFPOpcode()) {
6557 Chain = MergeInputChains(N, OtherExtend.getNode());
6558 if (!Chain)
6559 continue;
6560 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
6561 {MVT::v2f64, MVT::Other}, {Chain, Vec});
6562 Chain = VExtend.getValue(1);
6563 } else
6564 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
6565 MVT::v2f64, Vec);
6566 DCI.AddToWorklist(VExtend.getNode());
6567 SDValue Extract1 =
6568 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
6569 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
6570 DCI.AddToWorklist(Extract1.getNode());
6571 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
6572 if (Chain)
6573 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
6574 SDValue Extract0 =
6575 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
6576 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
6577 if (Chain)
6578 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
6579 N->getVTList(), Extract0, Chain);
6580 return Extract0;
6581 }
6582 }
6583 }
6584 }
6585 return SDValue();
6586}
6587
6588SDValue SystemZTargetLowering::combineINT_TO_FP(
6589 SDNode *N, DAGCombinerInfo &DCI) const {
6590 if (DCI.Level != BeforeLegalizeTypes)
6591 return SDValue();
6592 unsigned Opcode = N->getOpcode();
6593 EVT OutVT = N->getValueType(0);
6594 SelectionDAG &DAG = DCI.DAG;
6595 SDValue Op = N->getOperand(0);
6596 unsigned OutScalarBits = OutVT.getScalarSizeInBits();
6597 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
6598
6599 // Insert an extension before type-legalization to avoid scalarization, e.g.:
6600 // v2f64 = uint_to_fp v2i16
6601 // =>
6602 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
6603 if (OutVT.isVector() && OutScalarBits > InScalarBits) {
6604 MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(OutVT.getScalarSizeInBits()),
6605 OutVT.getVectorNumElements());
6606 unsigned ExtOpcode =
6607 (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND);
6608 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
6609 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
6610 }
6611 return SDValue();
6612}
6613
6614SDValue SystemZTargetLowering::combineBSWAP(
6615 SDNode *N, DAGCombinerInfo &DCI) const {
6616 SelectionDAG &DAG = DCI.DAG;
6617 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
6618 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
6619 N->getOperand(0).hasOneUse() &&
6620 canLoadStoreByteSwapped(N->getValueType(0))) {
6621 SDValue Load = N->getOperand(0);
6622 LoadSDNode *LD = cast<LoadSDNode>(Load);
6623
6624 // Create the byte-swapping load.
6625 SDValue Ops[] = {
6626 LD->getChain(), // Chain
6627 LD->getBasePtr() // Ptr
6628 };
6629 EVT LoadVT = N->getValueType(0);
6630 if (LoadVT == MVT::i16)
6631 LoadVT = MVT::i32;
6632 SDValue BSLoad =
6633 DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
6634 DAG.getVTList(LoadVT, MVT::Other),
6635 Ops, LD->getMemoryVT(), LD->getMemOperand());
6636
6637 // If this is an i16 load, insert the truncate.
6638 SDValue ResVal = BSLoad;
6639 if (N->getValueType(0) == MVT::i16)
6640 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
6641
6642 // First, combine the bswap away. This makes the value produced by the
6643 // load dead.
6644 DCI.CombineTo(N, ResVal);
6645
6646 // Next, combine the load away, we give it a bogus result value but a real
6647 // chain result. The result value is dead because the bswap is dead.
6648 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
6649
6650 // Return N so it doesn't get rechecked!
6651 return SDValue(N, 0);
6652 }
6653
6654 // Look through bitcasts that retain the number of vector elements.
6655 SDValue Op = N->getOperand(0);
6656 if (Op.getOpcode() == ISD::BITCAST &&
6657 Op.getValueType().isVector() &&
6658 Op.getOperand(0).getValueType().isVector() &&
6659 Op.getValueType().getVectorNumElements() ==
6660 Op.getOperand(0).getValueType().getVectorNumElements())
6661 Op = Op.getOperand(0);
6662
6663 // Push BSWAP into a vector insertion if at least one side then simplifies.
6664 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
6665 SDValue Vec = Op.getOperand(0);
6666 SDValue Elt = Op.getOperand(1);
6667 SDValue Idx = Op.getOperand(2);
6668
6669 if (DAG.isConstantIntBuildVectorOrConstantInt(Vec) ||
6670 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
6671 DAG.isConstantIntBuildVectorOrConstantInt(Elt) ||
6672 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
6673 (canLoadStoreByteSwapped(N->getValueType(0)) &&
6674 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
6675 EVT VecVT = N->getValueType(0);
6676 EVT EltVT = N->getValueType(0).getVectorElementType();
6677 if (VecVT != Vec.getValueType()) {
6678 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
6679 DCI.AddToWorklist(Vec.getNode());
6680 }
6681 if (EltVT != Elt.getValueType()) {
6682 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
6683 DCI.AddToWorklist(Elt.getNode());
6684 }
6685 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
6686 DCI.AddToWorklist(Vec.getNode());
6687 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
6688 DCI.AddToWorklist(Elt.getNode());
6689 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
6690 Vec, Elt, Idx);
6691 }
6692 }
6693
6694 // Push BSWAP into a vector shuffle if at least one side then simplifies.
6695 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
6696 if (SV && Op.hasOneUse()) {
6697 SDValue Op0 = Op.getOperand(0);
6698 SDValue Op1 = Op.getOperand(1);
6699
6700 if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) ||
6701 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
6702 DAG.isConstantIntBuildVectorOrConstantInt(Op1) ||
6703 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
6704 EVT VecVT = N->getValueType(0);
6705 if (VecVT != Op0.getValueType()) {
6706 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
6707 DCI.AddToWorklist(Op0.getNode());
6708 }
6709 if (VecVT != Op1.getValueType()) {
6710 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
6711 DCI.AddToWorklist(Op1.getNode());
6712 }
6713 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
6714 DCI.AddToWorklist(Op0.getNode());
6715 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
6716 DCI.AddToWorklist(Op1.getNode());
6717 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
6718 }
6719 }
6720
6721 return SDValue();
6722}
6723
6724static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
6725 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
6726 // set by the CCReg instruction using the CCValid / CCMask masks,
6727 // If the CCReg instruction is itself a ICMP testing the condition
6728 // code set by some other instruction, see whether we can directly
6729 // use that condition code.
6730
6731 // Verify that we have an ICMP against some constant.
6732 if (CCValid != SystemZ::CCMASK_ICMP)
6733 return false;
6734 auto *ICmp = CCReg.getNode();
6735 if (ICmp->getOpcode() != SystemZISD::ICMP)
6736 return false;
6737 auto *CompareLHS = ICmp->getOperand(0).getNode();
6738 auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
6739 if (!CompareRHS)
6740 return false;
6741
6742 // Optimize the case where CompareLHS is a SELECT_CCMASK.
6743 if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
6744 // Verify that we have an appropriate mask for a EQ or NE comparison.
6745 bool Invert = false;
6746 if (CCMask == SystemZ::CCMASK_CMP_NE)
6747 Invert = !Invert;
6748 else if (CCMask != SystemZ::CCMASK_CMP_EQ)
6749 return false;
6750
6751 // Verify that the ICMP compares against one of select values.
6752 auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
6753 if (!TrueVal)
6754 return false;
6755 auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
6756 if (!FalseVal)
6757 return false;
6758 if (CompareRHS->getZExtValue() == FalseVal->getZExtValue())
6759 Invert = !Invert;
6760 else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue())
6761 return false;
6762
6763 // Compute the effective CC mask for the new branch or select.
6764 auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
6765 auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
6766 if (!NewCCValid || !NewCCMask)
6767 return false;
6768 CCValid = NewCCValid->getZExtValue();
6769 CCMask = NewCCMask->getZExtValue();
6770 if (Invert)
6771 CCMask ^= CCValid;
6772
6773 // Return the updated CCReg link.
6774 CCReg = CompareLHS->getOperand(4);
6775 return true;
6776 }
6777
6778 // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
6779 if (CompareLHS->getOpcode() == ISD::SRA) {
6780 auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
6781 if (!SRACount || SRACount->getZExtValue() != 30)
6782 return false;
6783 auto *SHL = CompareLHS->getOperand(0).getNode();
6784 if (SHL->getOpcode() != ISD::SHL)
6785 return false;
6786 auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
6787 if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
6788 return false;
6789 auto *IPM = SHL->getOperand(0).getNode();
6790 if (IPM->getOpcode() != SystemZISD::IPM)
6791 return false;
6792
6793 // Avoid introducing CC spills (because SRA would clobber CC).
6794 if (!CompareLHS->hasOneUse())
6795 return false;
6796 // Verify that the ICMP compares against zero.
6797 if (CompareRHS->getZExtValue() != 0)
6798 return false;
6799
6800 // Compute the effective CC mask for the new branch or select.
6801 CCMask = SystemZ::reverseCCMask(CCMask);
6802
6803 // Return the updated CCReg link.
6804 CCReg = IPM->getOperand(0);
6805 return true;
6806 }
6807
6808 return false;
6809}
6810
6811SDValue SystemZTargetLowering::combineBR_CCMASK(
6812 SDNode *N, DAGCombinerInfo &DCI) const {
6813 SelectionDAG &DAG = DCI.DAG;
6814
6815 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
6816 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
6817 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
6818 if (!CCValid || !CCMask)
6819 return SDValue();
6820
6821 int CCValidVal = CCValid->getZExtValue();
6822 int CCMaskVal = CCMask->getZExtValue();
6823 SDValue Chain = N->getOperand(0);
6824 SDValue CCReg = N->getOperand(4);
6825
6826 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
6827 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
6828 Chain,
6829 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
6830 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
6831 N->getOperand(3), CCReg);
6832 return SDValue();
6833}
6834
6835SDValue SystemZTargetLowering::combineSELECT_CCMASK(
6836 SDNode *N, DAGCombinerInfo &DCI) const {
6837 SelectionDAG &DAG = DCI.DAG;
6838
6839 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
6840 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
6841 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
6842 if (!CCValid || !CCMask)
6843 return SDValue();
6844
6845 int CCValidVal = CCValid->getZExtValue();
6846 int CCMaskVal = CCMask->getZExtValue();
6847 SDValue CCReg = N->getOperand(4);
6848
6849 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
6850 return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
6851 N->getOperand(0), N->getOperand(1),
6852 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
6853 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
6854 CCReg);
6855 return SDValue();
6856}
6857
6858
6859SDValue SystemZTargetLowering::combineGET_CCMASK(
6860 SDNode *N, DAGCombinerInfo &DCI) const {
6861
6862 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
6863 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
6864 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
6865 if (!CCValid || !CCMask)
6866 return SDValue();
6867 int CCValidVal = CCValid->getZExtValue();
6868 int CCMaskVal = CCMask->getZExtValue();
6869
6870 SDValue Select = N->getOperand(0);
6871 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
6872 return SDValue();
6873
6874 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
6875 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
6876 if (!SelectCCValid || !SelectCCMask)
6877 return SDValue();
6878 int SelectCCValidVal = SelectCCValid->getZExtValue();
6879 int SelectCCMaskVal = SelectCCMask->getZExtValue();
6880
6881 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
6882 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
6883 if (!TrueVal || !FalseVal)
6884 return SDValue();
6885 if (TrueVal->getZExtValue() != 0 && FalseVal->getZExtValue() == 0)
6886 ;
6887 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() != 0)
6888 SelectCCMaskVal ^= SelectCCValidVal;
6889 else
6890 return SDValue();
6891
6892 if (SelectCCValidVal & ~CCValidVal)
6893 return SDValue();
6894 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
6895 return SDValue();
6896
6897 return Select->getOperand(4);
6898}
6899
6900SDValue SystemZTargetLowering::combineIntDIVREM(
6901 SDNode *N, DAGCombinerInfo &DCI) const {
6902 SelectionDAG &DAG = DCI.DAG;
6903 EVT VT = N->getValueType(0);
6904 // In the case where the divisor is a vector of constants a cheaper
6905 // sequence of instructions can replace the divide. BuildSDIV is called to
6906 // do this during DAG combining, but it only succeeds when it can build a
6907 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
6908 // since it is not Legal but Custom it can only happen before
6909 // legalization. Therefore we must scalarize this early before Combine
6910 // 1. For widened vectors, this is already the result of type legalization.
6911 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
6912 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
6913 return DAG.UnrollVectorOp(N);
6914 return SDValue();
6915}
6916
6917SDValue SystemZTargetLowering::combineINTRINSIC(
6918 SDNode *N, DAGCombinerInfo &DCI) const {
6919 SelectionDAG &DAG = DCI.DAG;
6920
6921 unsigned Id = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
6922 switch (Id) {
6923 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
6924 // or larger is simply a vector load.
6925 case Intrinsic::s390_vll:
6926 case Intrinsic::s390_vlrl:
6927 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
6928 if (C->getZExtValue() >= 15)
6929 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
6930 N->getOperand(3), MachinePointerInfo());
6931 break;
6932 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
6933 case Intrinsic::s390_vstl:
6934 case Intrinsic::s390_vstrl:
6935 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
6936 if (C->getZExtValue() >= 15)
6937 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
6938 N->getOperand(4), MachinePointerInfo());
6939 break;
6940 }
6941
6942 return SDValue();
6943}
6944
6945SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
6946 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
6947 return N->getOperand(0);
6948 return N;
6949}
6950
6951SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
6952 DAGCombinerInfo &DCI) const {
6953 switch(N->getOpcode()) {
6954 default: break;
6955 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
6956 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
6957 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
6958 case SystemZISD::MERGE_HIGH:
6959 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
6960 case ISD::LOAD: return combineLOAD(N, DCI);
6961 case ISD::STORE: return combineSTORE(N, DCI);
6962 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
6963 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
6964 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
6965 case ISD::STRICT_FP_ROUND:
6966 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
6967 case ISD::STRICT_FP_EXTEND:
6968 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
6969 case ISD::SINT_TO_FP:
6970 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
6971 case ISD::BSWAP: return combineBSWAP(N, DCI);
6972 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
6973 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
6974 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
6975 case ISD::SDIV:
6976 case ISD::UDIV:
6977 case ISD::SREM:
6978 case ISD::UREM: return combineIntDIVREM(N, DCI);
6979 case ISD::INTRINSIC_W_CHAIN:
6980 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
6981 }
6982
6983 return SDValue();
6984}
6985
6986// Return the demanded elements for the OpNo source operand of Op. DemandedElts
6987// are for Op.
6988static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
6989 unsigned OpNo) {
6990 EVT VT = Op.getValueType();
6991 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
6992 APInt SrcDemE;
6993 unsigned Opcode = Op.getOpcode();
6994 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
6995 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
6996 switch (Id) {
6997 case Intrinsic::s390_vpksh: // PACKS
6998 case Intrinsic::s390_vpksf:
6999 case Intrinsic::s390_vpksg:
7000 case Intrinsic::s390_vpkshs: // PACKS_CC
7001 case Intrinsic::s390_vpksfs:
7002 case Intrinsic::s390_vpksgs:
7003 case Intrinsic::s390_vpklsh: // PACKLS
7004 case Intrinsic::s390_vpklsf:
7005 case Intrinsic::s390_vpklsg:
7006 case Intrinsic::s390_vpklshs: // PACKLS_CC
7007 case Intrinsic::s390_vpklsfs:
7008 case Intrinsic::s390_vpklsgs:
7009 // VECTOR PACK truncates the elements of two source vectors into one.
7010 SrcDemE = DemandedElts;
7011 if (OpNo == 2)
7012 SrcDemE.lshrInPlace(NumElts / 2);
7013 SrcDemE = SrcDemE.trunc(NumElts / 2);
7014 break;
7015 // VECTOR UNPACK extends half the elements of the source vector.
7016 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7017 case Intrinsic::s390_vuphh:
7018 case Intrinsic::s390_vuphf:
7019 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
7020 case Intrinsic::s390_vuplhh:
7021 case Intrinsic::s390_vuplhf:
7022 SrcDemE = APInt(NumElts * 2, 0);
7023 SrcDemE.insertBits(DemandedElts, 0);
7024 break;
7025 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7026 case Intrinsic::s390_vuplhw:
7027 case Intrinsic::s390_vuplf:
7028 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
7029 case Intrinsic::s390_vupllh:
7030 case Intrinsic::s390_vupllf:
7031 SrcDemE = APInt(NumElts * 2, 0);
7032 SrcDemE.insertBits(DemandedElts, NumElts);
7033 break;
7034 case Intrinsic::s390_vpdi: {
7035 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
7036 SrcDemE = APInt(NumElts, 0);
7037 if (!DemandedElts[OpNo - 1])
7038 break;
7039 unsigned Mask = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
7040 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
7041 // Demand input element 0 or 1, given by the mask bit value.
7042 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
7043 break;
7044 }
7045 case Intrinsic::s390_vsldb: {
7046 // VECTOR SHIFT LEFT DOUBLE BY BYTE
7047 assert(VT == MVT::v16i8 && "Unexpected type.")(static_cast <bool> (VT == MVT::v16i8 && "Unexpected type."
) ? void (0) : __assert_fail ("VT == MVT::v16i8 && \"Unexpected type.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7047, __extension__
__PRETTY_FUNCTION__))
;
7048 unsigned FirstIdx = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
7049 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.")(static_cast <bool> (FirstIdx > 0 && FirstIdx
< 16 && "Unused operand.") ? void (0) : __assert_fail
("FirstIdx > 0 && FirstIdx < 16 && \"Unused operand.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7049, __extension__
__PRETTY_FUNCTION__))
;
7050 unsigned NumSrc0Els = 16 - FirstIdx;
7051 SrcDemE = APInt(NumElts, 0);
7052 if (OpNo == 1) {
7053 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
7054 SrcDemE.insertBits(DemEls, FirstIdx);
7055 } else {
7056 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
7057 SrcDemE.insertBits(DemEls, 0);
7058 }
7059 break;
7060 }
7061 case Intrinsic::s390_vperm:
7062 SrcDemE = APInt(NumElts, 1);
7063 break;
7064 default:
7065 llvm_unreachable("Unhandled intrinsic.")::llvm::llvm_unreachable_internal("Unhandled intrinsic.", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 7065)
;
7066 break;
7067 }
7068 } else {
7069 switch (Opcode) {
7070 case SystemZISD::JOIN_DWORDS:
7071 // Scalar operand.
7072 SrcDemE = APInt(1, 1);
7073 break;
7074 case SystemZISD::SELECT_CCMASK:
7075 SrcDemE = DemandedElts;
7076 break;
7077 default:
7078 llvm_unreachable("Unhandled opcode.")::llvm::llvm_unreachable_internal("Unhandled opcode.", "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp"
, 7078)
;
7079 break;
7080 }
7081 }
7082 return SrcDemE;
7083}
7084
7085static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
7086 const APInt &DemandedElts,
7087 const SelectionDAG &DAG, unsigned Depth,
7088 unsigned OpNo) {
7089 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
7090 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
7091 KnownBits LHSKnown =
7092 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
7093 KnownBits RHSKnown =
7094 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
7095 Known = KnownBits::commonBits(LHSKnown, RHSKnown);
7096}
7097
7098void
7099SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
7100 KnownBits &Known,
7101 const APInt &DemandedElts,
7102 const SelectionDAG &DAG,
7103 unsigned Depth) const {
7104 Known.resetAll();
7105
7106 // Intrinsic CC result is returned in the two low bits.
7107 unsigned tmp0, tmp1; // not used
7108 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) {
7109 Known.Zero.setBitsFrom(2);
7110 return;
7111 }
7112 EVT VT = Op.getValueType();
7113 if (Op.getResNo() != 0 || VT == MVT::Untyped)
7114 return;
7115 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&(static_cast <bool> (Known.getBitWidth() == VT.getScalarSizeInBits
() && "KnownBits does not match VT in bitwidth") ? void
(0) : __assert_fail ("Known.getBitWidth() == VT.getScalarSizeInBits() && \"KnownBits does not match VT in bitwidth\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7116, __extension__
__PRETTY_FUNCTION__))
7116 "KnownBits does not match VT in bitwidth")(static_cast <bool> (Known.getBitWidth() == VT.getScalarSizeInBits
() && "KnownBits does not match VT in bitwidth") ? void
(0) : __assert_fail ("Known.getBitWidth() == VT.getScalarSizeInBits() && \"KnownBits does not match VT in bitwidth\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7116, __extension__
__PRETTY_FUNCTION__))
;
7117 assert ((!VT.isVector() ||(static_cast <bool> ((!VT.isVector() || (DemandedElts.getBitWidth
() == VT.getVectorNumElements())) && "DemandedElts does not match VT number of elements"
) ? void (0) : __assert_fail ("(!VT.isVector() || (DemandedElts.getBitWidth() == VT.getVectorNumElements())) && \"DemandedElts does not match VT number of elements\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7119, __extension__
__PRETTY_FUNCTION__))
7118 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&(static_cast <bool> ((!VT.isVector() || (DemandedElts.getBitWidth
() == VT.getVectorNumElements())) && "DemandedElts does not match VT number of elements"
) ? void (0) : __assert_fail ("(!VT.isVector() || (DemandedElts.getBitWidth() == VT.getVectorNumElements())) && \"DemandedElts does not match VT number of elements\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7119, __extension__
__PRETTY_FUNCTION__))
7119 "DemandedElts does not match VT number of elements")(static_cast <bool> ((!VT.isVector() || (DemandedElts.getBitWidth
() == VT.getVectorNumElements())) && "DemandedElts does not match VT number of elements"
) ? void (0) : __assert_fail ("(!VT.isVector() || (DemandedElts.getBitWidth() == VT.getVectorNumElements())) && \"DemandedElts does not match VT number of elements\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7119, __extension__
__PRETTY_FUNCTION__))
;
7120 unsigned BitWidth = Known.getBitWidth();
7121 unsigned Opcode = Op.getOpcode();
7122 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7123 bool IsLogical = false;
7124 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7125 switch (Id) {
7126 case Intrinsic::s390_vpksh: // PACKS
7127 case Intrinsic::s390_vpksf:
7128 case Intrinsic::s390_vpksg:
7129 case Intrinsic::s390_vpkshs: // PACKS_CC
7130 case Intrinsic::s390_vpksfs:
7131 case Intrinsic::s390_vpksgs:
7132 case Intrinsic::s390_vpklsh: // PACKLS
7133 case Intrinsic::s390_vpklsf:
7134 case Intrinsic::s390_vpklsg:
7135 case Intrinsic::s390_vpklshs: // PACKLS_CC
7136 case Intrinsic::s390_vpklsfs:
7137 case Intrinsic::s390_vpklsgs:
7138 case Intrinsic::s390_vpdi:
7139 case Intrinsic::s390_vsldb:
7140 case Intrinsic::s390_vperm:
7141 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
7142 break;
7143 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
7144 case Intrinsic::s390_vuplhh:
7145 case Intrinsic::s390_vuplhf:
7146 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
7147 case Intrinsic::s390_vupllh:
7148 case Intrinsic::s390_vupllf:
7149 IsLogical = true;
7150 LLVM_FALLTHROUGH[[gnu::fallthrough]];
7151 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7152 case Intrinsic::s390_vuphh:
7153 case Intrinsic::s390_vuphf:
7154 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7155 case Intrinsic::s390_vuplhw:
7156 case Intrinsic::s390_vuplf: {
7157 SDValue SrcOp = Op.getOperand(1);
7158 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
7159 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
7160 if (IsLogical) {
7161 Known = Known.zext(BitWidth);
7162 } else
7163 Known = Known.sext(BitWidth);
7164 break;
7165 }
7166 default:
7167 break;
7168 }
7169 } else {
7170 switch (Opcode) {
7171 case SystemZISD::JOIN_DWORDS:
7172 case SystemZISD::SELECT_CCMASK:
7173 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
7174 break;
7175 case SystemZISD::REPLICATE: {
7176 SDValue SrcOp = Op.getOperand(0);
7177 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
7178 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
7179 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
7180 break;
7181 }
7182 default:
7183 break;
7184 }
7185 }
7186
7187 // Known has the width of the source operand(s). Adjust if needed to match
7188 // the passed bitwidth.
7189 if (Known.getBitWidth() != BitWidth)
7190 Known = Known.anyextOrTrunc(BitWidth);
7191}
7192
7193static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
7194 const SelectionDAG &DAG, unsigned Depth,
7195 unsigned OpNo) {
7196 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
7197 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
7198 if (LHS == 1) return 1; // Early out.
7199 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
7200 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
7201 if (RHS == 1) return 1; // Early out.
7202 unsigned Common = std::min(LHS, RHS);
7203 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
7204 EVT VT = Op.getValueType();
7205 unsigned VTBits = VT.getScalarSizeInBits();
7206 if (SrcBitWidth > VTBits) { // PACK
7207 unsigned SrcExtraBits = SrcBitWidth - VTBits;
7208 if (Common > SrcExtraBits)
7209 return (Common - SrcExtraBits);
7210 return 1;
7211 }
7212 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.")(static_cast <bool> (SrcBitWidth == VTBits && "Expected operands of same bitwidth."
) ? void (0) : __assert_fail ("SrcBitWidth == VTBits && \"Expected operands of same bitwidth.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7212, __extension__
__PRETTY_FUNCTION__))
;
7213 return Common;
7214}
7215
7216unsigned
7217SystemZTargetLowering::ComputeNumSignBitsForTargetNode(
7218 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
7219 unsigned Depth) const {
7220 if (Op.getResNo() != 0)
7221 return 1;
7222 unsigned Opcode = Op.getOpcode();
7223 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7224 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7225 switch (Id) {
7226 case Intrinsic::s390_vpksh: // PACKS
7227 case Intrinsic::s390_vpksf:
7228 case Intrinsic::s390_vpksg:
7229 case Intrinsic::s390_vpkshs: // PACKS_CC
7230 case Intrinsic::s390_vpksfs:
7231 case Intrinsic::s390_vpksgs:
7232 case Intrinsic::s390_vpklsh: // PACKLS
7233 case Intrinsic::s390_vpklsf:
7234 case Intrinsic::s390_vpklsg:
7235 case Intrinsic::s390_vpklshs: // PACKLS_CC
7236 case Intrinsic::s390_vpklsfs:
7237 case Intrinsic::s390_vpklsgs:
7238 case Intrinsic::s390_vpdi:
7239 case Intrinsic::s390_vsldb:
7240 case Intrinsic::s390_vperm:
7241 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
7242 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7243 case Intrinsic::s390_vuphh:
7244 case Intrinsic::s390_vuphf:
7245 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7246 case Intrinsic::s390_vuplhw:
7247 case Intrinsic::s390_vuplf: {
7248 SDValue PackedOp = Op.getOperand(1);
7249 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
7250 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
7251 EVT VT = Op.getValueType();
7252 unsigned VTBits = VT.getScalarSizeInBits();
7253 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
7254 return Tmp;
7255 }
7256 default:
7257 break;
7258 }
7259 } else {
7260 switch (Opcode) {
7261 case SystemZISD::SELECT_CCMASK:
7262 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
7263 default:
7264 break;
7265 }
7266 }
7267
7268 return 1;
7269}
7270
7271unsigned
7272SystemZTargetLowering::getStackProbeSize(MachineFunction &MF) const {
7273 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
7274 unsigned StackAlign = TFI->getStackAlignment();
7275 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&(static_cast <bool> (StackAlign >=1 && isPowerOf2_32
(StackAlign) && "Unexpected stack alignment") ? void (
0) : __assert_fail ("StackAlign >=1 && isPowerOf2_32(StackAlign) && \"Unexpected stack alignment\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7276, __extension__
__PRETTY_FUNCTION__))
7276 "Unexpected stack alignment")(static_cast <bool> (StackAlign >=1 && isPowerOf2_32
(StackAlign) && "Unexpected stack alignment") ? void (
0) : __assert_fail ("StackAlign >=1 && isPowerOf2_32(StackAlign) && \"Unexpected stack alignment\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7276, __extension__
__PRETTY_FUNCTION__))
;
7277 // The default stack probe size is 4096 if the function has no
7278 // stack-probe-size attribute.
7279 unsigned StackProbeSize = 4096;
7280 const Function &Fn = MF.getFunction();
7281 if (Fn.hasFnAttribute("stack-probe-size"))
7282 Fn.getFnAttribute("stack-probe-size")
7283 .getValueAsString()
7284 .getAsInteger(0, StackProbeSize);
7285 // Round down to the stack alignment.
7286 StackProbeSize &= ~(StackAlign - 1);
7287 return StackProbeSize ? StackProbeSize : StackAlign;
7288}
7289
7290//===----------------------------------------------------------------------===//
7291// Custom insertion
7292//===----------------------------------------------------------------------===//
7293
7294// Force base value Base into a register before MI. Return the register.
7295static Register forceReg(MachineInstr &MI, MachineOperand &Base,
7296 const SystemZInstrInfo *TII) {
7297 MachineBasicBlock *MBB = MI.getParent();
7298 MachineFunction &MF = *MBB->getParent();
7299 MachineRegisterInfo &MRI = MF.getRegInfo();
7300
7301 if (Base.isReg()) {
7302 // Copy Base into a new virtual register to help register coalescing in
7303 // cases with multiple uses.
7304 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
7305 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
7306 .add(Base);
7307 return Reg;
7308 }
7309
7310 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
7311 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
7312 .add(Base)
7313 .addImm(0)
7314 .addReg(0);
7315 return Reg;
7316}
7317
7318// The CC operand of MI might be missing a kill marker because there
7319// were multiple uses of CC, and ISel didn't know which to mark.
7320// Figure out whether MI should have had a kill marker.
7321static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB) {
7322 // Scan forward through BB for a use/def of CC.
7323 MachineBasicBlock::iterator miI(std::next(MachineBasicBlock::iterator(MI)));
7324 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
7325 const MachineInstr& mi = *miI;
7326 if (mi.readsRegister(SystemZ::CC))
7327 return false;
7328 if (mi.definesRegister(SystemZ::CC))
7329 break; // Should have kill-flag - update below.
7330 }
7331
7332 // If we hit the end of the block, check whether CC is live into a
7333 // successor.
7334 if (miI == MBB->end()) {
7335 for (const MachineBasicBlock *Succ : MBB->successors())
7336 if (Succ->isLiveIn(SystemZ::CC))
7337 return false;
7338 }
7339
7340 return true;
7341}
7342
7343// Return true if it is OK for this Select pseudo-opcode to be cascaded
7344// together with other Select pseudo-opcodes into a single basic-block with
7345// a conditional jump around it.
7346static bool isSelectPseudo(MachineInstr &MI) {
7347 switch (MI.getOpcode()) {
7348 case SystemZ::Select32:
7349 case SystemZ::Select64:
7350 case SystemZ::SelectF32:
7351 case SystemZ::SelectF64:
7352 case SystemZ::SelectF128:
7353 case SystemZ::SelectVR32:
7354 case SystemZ::SelectVR64:
7355 case SystemZ::SelectVR128:
7356 return true;
7357
7358 default:
7359 return false;
7360 }
7361}
7362
7363// Helper function, which inserts PHI functions into SinkMBB:
7364// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
7365// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
7366static void createPHIsForSelects(SmallVector<MachineInstr*, 8> &Selects,
7367 MachineBasicBlock *TrueMBB,
7368 MachineBasicBlock *FalseMBB,
7369 MachineBasicBlock *SinkMBB) {
7370 MachineFunction *MF = TrueMBB->getParent();
7371 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
7372
7373 MachineInstr *FirstMI = Selects.front();
7374 unsigned CCValid = FirstMI->getOperand(3).getImm();
7375 unsigned CCMask = FirstMI->getOperand(4).getImm();
7376
7377 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
7378
7379 // As we are creating the PHIs, we have to be careful if there is more than
7380 // one. Later Selects may reference the results of earlier Selects, but later
7381 // PHIs have to reference the individual true/false inputs from earlier PHIs.
7382 // That also means that PHI construction must work forward from earlier to
7383 // later, and that the code must maintain a mapping from earlier PHI's
7384 // destination registers, and the registers that went into the PHI.
7385 DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;
7386
7387 for (auto MI : Selects) {
7388 Register DestReg = MI->getOperand(0).getReg();
7389 Register TrueReg = MI->getOperand(1).getReg();
7390 Register FalseReg = MI->getOperand(2).getReg();
7391
7392 // If this Select we are generating is the opposite condition from
7393 // the jump we generated, then we have to swap the operands for the
7394 // PHI that is going to be generated.
7395 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
7396 std::swap(TrueReg, FalseReg);
7397
7398 if (RegRewriteTable.find(TrueReg) != RegRewriteTable.end())
7399 TrueReg = RegRewriteTable[TrueReg].first;
7400
7401 if (RegRewriteTable.find(FalseReg) != RegRewriteTable.end())
7402 FalseReg = RegRewriteTable[FalseReg].second;
7403
7404 DebugLoc DL = MI->getDebugLoc();
7405 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
7406 .addReg(TrueReg).addMBB(TrueMBB)
7407 .addReg(FalseReg).addMBB(FalseMBB);
7408
7409 // Add this PHI to the rewrite table.
7410 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
7411 }
7412
7413 MF->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
7414}
7415
7416// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
7417MachineBasicBlock *
7418SystemZTargetLowering::emitSelect(MachineInstr &MI,
7419 MachineBasicBlock *MBB) const {
7420 assert(isSelectPseudo(MI) && "Bad call to emitSelect()")(static_cast <bool> (isSelectPseudo(MI) && "Bad call to emitSelect()"
) ? void (0) : __assert_fail ("isSelectPseudo(MI) && \"Bad call to emitSelect()\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7420, __extension__
__PRETTY_FUNCTION__))
;
7421 const SystemZInstrInfo *TII =
7422 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
7423
7424 unsigned CCValid = MI.getOperand(3).getImm();
7425 unsigned CCMask = MI.getOperand(4).getImm();
7426
7427 // If we have a sequence of Select* pseudo instructions using the
7428 // same condition code value, we want to expand all of them into
7429 // a single pair of basic blocks using the same condition.
7430 SmallVector<MachineInstr*, 8> Selects;
7431 SmallVector<MachineInstr*, 8> DbgValues;
7432 Selects.push_back(&MI);
7433 unsigned Count = 0;
7434 for (MachineBasicBlock::iterator NextMIIt =
7435 std::next(MachineBasicBlock::iterator(MI));
7436 NextMIIt != MBB->end(); ++NextMIIt) {
7437 if (isSelectPseudo(*NextMIIt)) {
7438 assert(NextMIIt->getOperand(3).getImm() == CCValid &&(static_cast <bool> (NextMIIt->getOperand(3).getImm(
) == CCValid && "Bad CCValid operands since CC was not redefined."
) ? void (0) : __assert_fail ("NextMIIt->getOperand(3).getImm() == CCValid && \"Bad CCValid operands since CC was not redefined.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7439, __extension__
__PRETTY_FUNCTION__))
7439 "Bad CCValid operands since CC was not redefined.")(static_cast <bool> (NextMIIt->getOperand(3).getImm(
) == CCValid && "Bad CCValid operands since CC was not redefined."
) ? void (0) : __assert_fail ("NextMIIt->getOperand(3).getImm() == CCValid && \"Bad CCValid operands since CC was not redefined.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7439, __extension__
__PRETTY_FUNCTION__))
;
7440 if (NextMIIt->getOperand(4).getImm() == CCMask ||
7441 NextMIIt->getOperand(4).getImm() == (CCValid ^ CCMask)) {
7442 Selects.push_back(&*NextMIIt);
7443 continue;
7444 }
7445 break;
7446 }
7447 if (NextMIIt->definesRegister(SystemZ::CC) ||
7448 NextMIIt->usesCustomInsertionHook())
7449 break;
7450 bool User = false;
7451 for (auto SelMI : Selects)
7452 if (NextMIIt->readsVirtualRegister(SelMI->getOperand(0).getReg())) {
7453 User = true;
7454 break;
7455 }
7456 if (NextMIIt->isDebugInstr()) {
7457 if (User) {
7458 assert(NextMIIt->isDebugValue() && "Unhandled debug opcode.")(static_cast <bool> (NextMIIt->isDebugValue() &&
"Unhandled debug opcode.") ? void (0) : __assert_fail ("NextMIIt->isDebugValue() && \"Unhandled debug opcode.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7458, __extension__
__PRETTY_FUNCTION__))
;
7459 DbgValues.push_back(&*NextMIIt);
7460 }
7461 }
7462 else if (User || ++Count > 20)
7463 break;
7464 }
7465
7466 MachineInstr *LastMI = Selects.back();
7467 bool CCKilled =
7468 (LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB));
7469 MachineBasicBlock *StartMBB = MBB;
7470 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB);
7471 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
7472
7473 // Unless CC was killed in the last Select instruction, mark it as
7474 // live-in to both FalseMBB and JoinMBB.
7475 if (!CCKilled) {
7476 FalseMBB->addLiveIn(SystemZ::CC);
7477 JoinMBB->addLiveIn(SystemZ::CC);
7478 }
7479
7480 // StartMBB:
7481 // BRC CCMask, JoinMBB
7482 // # fallthrough to FalseMBB
7483 MBB = StartMBB;
7484 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
7485 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
7486 MBB->addSuccessor(JoinMBB);
7487 MBB->addSuccessor(FalseMBB);
7488
7489 // FalseMBB:
7490 // # fallthrough to JoinMBB
7491 MBB = FalseMBB;
7492 MBB->addSuccessor(JoinMBB);
7493
7494 // JoinMBB:
7495 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
7496 // ...
7497 MBB = JoinMBB;
7498 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
7499 for (auto SelMI : Selects)
7500 SelMI->eraseFromParent();
7501
7502 MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI();
7503 for (auto DbgMI : DbgValues)
7504 MBB->splice(InsertPos, StartMBB, DbgMI);
7505
7506 return JoinMBB;
7507}
7508
7509// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
7510// StoreOpcode is the store to use and Invert says whether the store should
7511// happen when the condition is false rather than true. If a STORE ON
7512// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
7513MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
7514 MachineBasicBlock *MBB,
7515 unsigned StoreOpcode,
7516 unsigned STOCOpcode,
7517 bool Invert) const {
7518 const SystemZInstrInfo *TII =
7519 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
7520
7521 Register SrcReg = MI.getOperand(0).getReg();
7522 MachineOperand Base = MI.getOperand(1);
7523 int64_t Disp = MI.getOperand(2).getImm();
7524 Register IndexReg = MI.getOperand(3).getReg();
7525 unsigned CCValid = MI.getOperand(4).getImm();
7526 unsigned CCMask = MI.getOperand(5).getImm();
7527 DebugLoc DL = MI.getDebugLoc();
7528
7529 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
7530
7531 // ISel pattern matching also adds a load memory operand of the same
7532 // address, so take special care to find the storing memory operand.
7533 MachineMemOperand *MMO = nullptr;
7534 for (auto *I : MI.memoperands())
7535 if (I->isStore()) {
7536 MMO = I;
7537 break;
7538 }
7539
7540 // Use STOCOpcode if possible. We could use different store patterns in
7541 // order to avoid matching the index register, but the performance trade-offs
7542 // might be more complicated in that case.
7543 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
7544 if (Invert)
7545 CCMask ^= CCValid;
7546
7547 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
7548 .addReg(SrcReg)
7549 .add(Base)
7550 .addImm(Disp)
7551 .addImm(CCValid)
7552 .addImm(CCMask)
7553 .addMemOperand(MMO);
7554
7555 MI.eraseFromParent();
7556 return MBB;
7557 }
7558
7559 // Get the condition needed to branch around the store.
7560 if (!Invert)
7561 CCMask ^= CCValid;
7562
7563 MachineBasicBlock *StartMBB = MBB;
7564 MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB);
7565 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
7566
7567 // Unless CC was killed in the CondStore instruction, mark it as
7568 // live-in to both FalseMBB and JoinMBB.
7569 if (!MI.killsRegister(SystemZ::CC) && !checkCCKill(MI, JoinMBB)) {
7570 FalseMBB->addLiveIn(SystemZ::CC);
7571 JoinMBB->addLiveIn(SystemZ::CC);
7572 }
7573
7574 // StartMBB:
7575 // BRC CCMask, JoinMBB
7576 // # fallthrough to FalseMBB
7577 MBB = StartMBB;
7578 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7579 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
7580 MBB->addSuccessor(JoinMBB);
7581 MBB->addSuccessor(FalseMBB);
7582
7583 // FalseMBB:
7584 // store %SrcReg, %Disp(%Index,%Base)
7585 // # fallthrough to JoinMBB
7586 MBB = FalseMBB;
7587 BuildMI(MBB, DL, TII->get(StoreOpcode))
7588 .addReg(SrcReg)
7589 .add(Base)
7590 .addImm(Disp)
7591 .addReg(IndexReg)
7592 .addMemOperand(MMO);
7593 MBB->addSuccessor(JoinMBB);
7594
7595 MI.eraseFromParent();
7596 return JoinMBB;
7597}
7598
7599// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
7600// or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that
7601// performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
7602// BitSize is the width of the field in bits, or 0 if this is a partword
7603// ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
7604// is one of the operands. Invert says whether the field should be
7605// inverted after performing BinOpcode (e.g. for NAND).
7606MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
7607 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
7608 unsigned BitSize, bool Invert) const {
7609 MachineFunction &MF = *MBB->getParent();
7610 const SystemZInstrInfo *TII =
7611 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
7612 MachineRegisterInfo &MRI = MF.getRegInfo();
7613 bool IsSubWord = (BitSize < 32);
7614
7615 // Extract the operands. Base can be a register or a frame index.
7616 // Src2 can be a register or immediate.
7617 Register Dest = MI.getOperand(0).getReg();
7618 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
7619 int64_t Disp = MI.getOperand(2).getImm();
7620 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
7621 Register BitShift = IsSubWord ? MI.getOperand(4).getReg() : Register();
7622 Register NegBitShift = IsSubWord ? MI.getOperand(5).getReg() : Register();
7623 DebugLoc DL = MI.getDebugLoc();
7624 if (IsSubWord)
7625 BitSize = MI.getOperand(6).getImm();
7626
7627 // Subword operations use 32-bit registers.
7628 const TargetRegisterClass *RC = (BitSize <= 32 ?
7629 &SystemZ::GR32BitRegClass :
7630 &SystemZ::GR64BitRegClass);
7631 unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
7632 unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
7633
7634 // Get the right opcodes for the displacement.
7635 LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
7636 CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
7637 assert(LOpcode && CSOpcode && "Displacement out of range")(static_cast <bool> (LOpcode && CSOpcode &&
"Displacement out of range") ? void (0) : __assert_fail ("LOpcode && CSOpcode && \"Displacement out of range\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7637, __extension__
__PRETTY_FUNCTION__))
;
7638
7639 // Create virtual registers for temporary results.
7640 Register OrigVal = MRI.createVirtualRegister(RC);
7641 Register OldVal = MRI.createVirtualRegister(RC);
7642 Register NewVal = (BinOpcode || IsSubWord ?
7643 MRI.createVirtualRegister(RC) : Src2.getReg());
7644 Register RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
7645 Register RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
7646
7647 // Insert a basic block for the main loop.
7648 MachineBasicBlock *StartMBB = MBB;
7649 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
7650 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
7651
7652 // StartMBB:
7653 // ...
7654 // %OrigVal = L Disp(%Base)
7655 // # fall through to LoopMBB
7656 MBB = StartMBB;
7657 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
7658 MBB->addSuccessor(LoopMBB);
7659
7660 // LoopMBB:
7661 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
7662 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
7663 // %RotatedNewVal = OP %RotatedOldVal, %Src2
7664 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
7665 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
7666 // JNE LoopMBB
7667 // # fall through to DoneMBB
7668 MBB = LoopMBB;
7669 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
7670 .addReg(OrigVal).addMBB(StartMBB)
7671 .addReg(Dest).addMBB(LoopMBB);
7672 if (IsSubWord)
7673 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
7674 .addReg(OldVal).addReg(BitShift).addImm(0);
7675 if (Invert) {
7676 // Perform the operation normally and then invert every bit of the field.
7677 Register Tmp = MRI.createVirtualRegister(RC);
7678 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
7679 if (BitSize <= 32)
7680 // XILF with the upper BitSize bits set.
7681 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
7682 .addReg(Tmp).addImm(-1U << (32 - BitSize));
7683 else {
7684 // Use LCGR and add -1 to the result, which is more compact than
7685 // an XILF, XILH pair.
7686 Register Tmp2 = MRI.createVirtualRegister(RC);
7687 BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp);
7688 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal)
7689 .addReg(Tmp2).addImm(-1);
7690 }
7691 } else if (BinOpcode)
7692 // A simply binary operation.
7693 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
7694 .addReg(RotatedOldVal)
7695 .add(Src2);
7696 else if (IsSubWord)
7697 // Use RISBG to rotate Src2 into position and use it to replace the
7698 // field in RotatedOldVal.
7699 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
7700 .addReg(RotatedOldVal).addReg(Src2.getReg())
7701 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
7702 if (IsSubWord)
7703 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
7704 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
7705 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
7706 .addReg(OldVal)
7707 .addReg(NewVal)
7708 .add(Base)
7709 .addImm(Disp);
7710 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7711 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
7712 MBB->addSuccessor(LoopMBB);
7713 MBB->addSuccessor(DoneMBB);
7714
7715 MI.eraseFromParent();
7716 return DoneMBB;
7717}
7718
7719// Implement EmitInstrWithCustomInserter for pseudo
7720// ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
7721// instruction that should be used to compare the current field with the
7722// minimum or maximum value. KeepOldMask is the BRC condition-code mask
7723// for when the current field should be kept. BitSize is the width of
7724// the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
7725MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
7726 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
7727 unsigned KeepOldMask, unsigned BitSize) const {
7728 MachineFunction &MF = *MBB->getParent();
7729 const SystemZInstrInfo *TII =
7730 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
7731 MachineRegisterInfo &MRI = MF.getRegInfo();
7732 bool IsSubWord = (BitSize < 32);
7733
7734 // Extract the operands. Base can be a register or a frame index.
7735 Register Dest = MI.getOperand(0).getReg();
7736 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
7737 int64_t Disp = MI.getOperand(2).getImm();
7738 Register Src2 = MI.getOperand(3).getReg();
7739 Register BitShift = (IsSubWord ? MI.getOperand(4).getReg() : Register());
7740 Register NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : Register());
7741 DebugLoc DL = MI.getDebugLoc();
7742 if (IsSubWord)
7743 BitSize = MI.getOperand(6).getImm();
7744
7745 // Subword operations use 32-bit registers.
7746 const TargetRegisterClass *RC = (BitSize <= 32 ?
7747 &SystemZ::GR32BitRegClass :
7748 &SystemZ::GR64BitRegClass);
7749 unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
7750 unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
7751
7752 // Get the right opcodes for the displacement.
7753 LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
7754 CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
7755 assert(LOpcode && CSOpcode && "Displacement out of range")(static_cast <bool> (LOpcode && CSOpcode &&
"Displacement out of range") ? void (0) : __assert_fail ("LOpcode && CSOpcode && \"Displacement out of range\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7755, __extension__
__PRETTY_FUNCTION__))
;
7756
7757 // Create virtual registers for temporary results.
7758 Register OrigVal = MRI.createVirtualRegister(RC);
7759 Register OldVal = MRI.createVirtualRegister(RC);
7760 Register NewVal = MRI.createVirtualRegister(RC);
7761 Register RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
7762 Register RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2);
7763 Register RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
7764
7765 // Insert 3 basic blocks for the loop.
7766 MachineBasicBlock *StartMBB = MBB;
7767 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
7768 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
7769 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
7770 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
7771
7772 // StartMBB:
7773 // ...
7774 // %OrigVal = L Disp(%Base)
7775 // # fall through to LoopMBB
7776 MBB = StartMBB;
7777 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
7778 MBB->addSuccessor(LoopMBB);
7779
7780 // LoopMBB:
7781 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
7782 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
7783 // CompareOpcode %RotatedOldVal, %Src2
7784 // BRC KeepOldMask, UpdateMBB
7785 MBB = LoopMBB;
7786 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
7787 .addReg(OrigVal).addMBB(StartMBB)
7788 .addReg(Dest).addMBB(UpdateMBB);
7789 if (IsSubWord)
7790 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
7791 .addReg(OldVal).addReg(BitShift).addImm(0);
7792 BuildMI(MBB, DL, TII->get(CompareOpcode))
7793 .addReg(RotatedOldVal).addReg(Src2);
7794 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7795 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
7796 MBB->addSuccessor(UpdateMBB);
7797 MBB->addSuccessor(UseAltMBB);
7798
7799 // UseAltMBB:
7800 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
7801 // # fall through to UpdateMBB
7802 MBB = UseAltMBB;
7803 if (IsSubWord)
7804 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
7805 .addReg(RotatedOldVal).addReg(Src2)
7806 .addImm(32).addImm(31 + BitSize).addImm(0);
7807 MBB->addSuccessor(UpdateMBB);
7808
7809 // UpdateMBB:
7810 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
7811 // [ %RotatedAltVal, UseAltMBB ]
7812 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
7813 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
7814 // JNE LoopMBB
7815 // # fall through to DoneMBB
7816 MBB = UpdateMBB;
7817 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
7818 .addReg(RotatedOldVal).addMBB(LoopMBB)
7819 .addReg(RotatedAltVal).addMBB(UseAltMBB);
7820 if (IsSubWord)
7821 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
7822 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
7823 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
7824 .addReg(OldVal)
7825 .addReg(NewVal)
7826 .add(Base)
7827 .addImm(Disp);
7828 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7829 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
7830 MBB->addSuccessor(LoopMBB);
7831 MBB->addSuccessor(DoneMBB);
7832
7833 MI.eraseFromParent();
7834 return DoneMBB;
7835}
7836
7837// Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW
7838// instruction MI.
7839MachineBasicBlock *
7840SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
7841 MachineBasicBlock *MBB) const {
7842 MachineFunction &MF = *MBB->getParent();
7843 const SystemZInstrInfo *TII =
7844 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
7845 MachineRegisterInfo &MRI = MF.getRegInfo();
7846
7847 // Extract the operands. Base can be a register or a frame index.
7848 Register Dest = MI.getOperand(0).getReg();
7849 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
7850 int64_t Disp = MI.getOperand(2).getImm();
7851 Register CmpVal = MI.getOperand(3).getReg();
7852 Register OrigSwapVal = MI.getOperand(4).getReg();
7853 Register BitShift = MI.getOperand(5).getReg();
7854 Register NegBitShift = MI.getOperand(6).getReg();
7855 int64_t BitSize = MI.getOperand(7).getImm();
7856 DebugLoc DL = MI.getDebugLoc();
7857
7858 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
7859
7860 // Get the right opcodes for the displacement and zero-extension.
7861 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
7862 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
7863 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
7864 assert(LOpcode && CSOpcode && "Displacement out of range")(static_cast <bool> (LOpcode && CSOpcode &&
"Displacement out of range") ? void (0) : __assert_fail ("LOpcode && CSOpcode && \"Displacement out of range\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 7864, __extension__
__PRETTY_FUNCTION__))
;
7865
7866 // Create virtual registers for temporary results.
7867 Register OrigOldVal = MRI.createVirtualRegister(RC);
7868 Register OldVal = MRI.createVirtualRegister(RC);
7869 Register SwapVal = MRI.createVirtualRegister(RC);
7870 Register StoreVal = MRI.createVirtualRegister(RC);
7871 Register OldValRot = MRI.createVirtualRegister(RC);
7872 Register RetryOldVal = MRI.createVirtualRegister(RC);
7873 Register RetrySwapVal = MRI.createVirtualRegister(RC);
7874
7875 // Insert 2 basic blocks for the loop.
7876 MachineBasicBlock *StartMBB = MBB;
7877 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
7878 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
7879 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
7880
7881 // StartMBB:
7882 // ...
7883 // %OrigOldVal = L Disp(%Base)
7884 // # fall through to LoopMBB
7885 MBB = StartMBB;
7886 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
7887 .add(Base)
7888 .addImm(Disp)
7889 .addReg(0);
7890 MBB->addSuccessor(LoopMBB);
7891
7892 // LoopMBB:
7893 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
7894 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
7895 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
7896 // ^^ The low BitSize bits contain the field
7897 // of interest.
7898 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
7899 // ^^ Replace the upper 32-BitSize bits of the
7900 // swap value with those that we loaded and rotated.
7901 // %Dest = LL[CH] %OldValRot
7902 // CR %Dest, %CmpVal
7903 // JNE DoneMBB
7904 // # Fall through to SetMBB
7905 MBB = LoopMBB;
7906 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
7907 .addReg(OrigOldVal).addMBB(StartMBB)
7908 .addReg(RetryOldVal).addMBB(SetMBB);
7909 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
7910 .addReg(OrigSwapVal).addMBB(StartMBB)
7911 .addReg(RetrySwapVal).addMBB(SetMBB);
7912 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
7913 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
7914 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
7915 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
7916 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
7917 .addReg(OldValRot);
7918 BuildMI(MBB, DL, TII->get(SystemZ::CR))
7919 .addReg(Dest).addReg(CmpVal);
7920 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7921 .addImm(SystemZ::CCMASK_ICMP)
7922 .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB);
7923 MBB->addSuccessor(DoneMBB);
7924 MBB->addSuccessor(SetMBB);
7925
7926 // SetMBB:
7927 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
7928 // ^^ Rotate the new field to its proper position.
7929 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
7930 // JNE LoopMBB
7931 // # fall through to ExitMBB
7932 MBB = SetMBB;
7933 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
7934 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
7935 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
7936 .addReg(OldVal)
7937 .addReg(StoreVal)
7938 .add(Base)
7939 .addImm(Disp);
7940 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7941 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
7942 MBB->addSuccessor(LoopMBB);
7943 MBB->addSuccessor(DoneMBB);
7944
7945 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
7946 // to the block after the loop. At this point, CC may have been defined
7947 // either by the CR in LoopMBB or by the CS in SetMBB.
7948 if (!MI.registerDefIsDead(SystemZ::CC))
7949 DoneMBB->addLiveIn(SystemZ::CC);
7950
7951 MI.eraseFromParent();
7952 return DoneMBB;
7953}
7954
7955// Emit a move from two GR64s to a GR128.
7956MachineBasicBlock *
7957SystemZTargetLowering::emitPair128(MachineInstr &MI,
7958 MachineBasicBlock *MBB) const {
7959 MachineFunction &MF = *MBB->getParent();
7960 const SystemZInstrInfo *TII =
7961 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
7962 MachineRegisterInfo &MRI = MF.getRegInfo();
7963 DebugLoc DL = MI.getDebugLoc();
7964
7965 Register Dest = MI.getOperand(0).getReg();
7966 Register Hi = MI.getOperand(1).getReg();
7967 Register Lo = MI.getOperand(2).getReg();
7968 Register Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
7969 Register Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
7970
7971 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Tmp1);
7972 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Tmp2)
7973 .addReg(Tmp1).addReg(Hi).addImm(SystemZ::subreg_h64);
7974 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
7975 .addReg(Tmp2).addReg(Lo).addImm(SystemZ::subreg_l64);
7976
7977 MI.eraseFromParent();
7978 return MBB;
7979}
7980
7981// Emit an extension from a GR64 to a GR128. ClearEven is true
7982// if the high register of the GR128 value must be cleared or false if
7983// it's "don't care".
7984MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
7985 MachineBasicBlock *MBB,
7986 bool ClearEven) const {
7987 MachineFunction &MF = *MBB->getParent();
7988 const SystemZInstrInfo *TII =
7989 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
7990 MachineRegisterInfo &MRI = MF.getRegInfo();
7991 DebugLoc DL = MI.getDebugLoc();
7992
7993 Register Dest = MI.getOperand(0).getReg();
7994 Register Src = MI.getOperand(1).getReg();
7995 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
7996
7997 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
7998 if (ClearEven) {
7999 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8000 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
8001
8002 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
8003 .addImm(0);
8004 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
8005 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
8006 In128 = NewIn128;
8007 }
8008 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
8009 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
8010
8011 MI.eraseFromParent();
8012 return MBB;
8013}
8014
8015MachineBasicBlock *
8016SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
8017 MachineBasicBlock *MBB,
8018 unsigned Opcode, bool IsMemset) const {
8019 MachineFunction &MF = *MBB->getParent();
8020 const SystemZInstrInfo *TII =
8021 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
8022 MachineRegisterInfo &MRI = MF.getRegInfo();
8023 DebugLoc DL = MI.getDebugLoc();
8024
8025 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
8026 uint64_t DestDisp = MI.getOperand(1).getImm();
8027 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
8028 uint64_t SrcDisp;
8029
8030 // Fold the displacement Disp if it is out of range.
8031 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
8032 if (!isUInt<12>(Disp)) {
8033 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8034 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
8035 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
8036 .add(Base).addImm(Disp).addReg(0);
8037 Base = MachineOperand::CreateReg(Reg, false);
8038 Disp = 0;
8039 }
8040 };
8041
8042 if (!IsMemset) {
8043 SrcBase = earlyUseOperand(MI.getOperand(2));
8044 SrcDisp = MI.getOperand(3).getImm();
8045 } else {
8046 SrcBase = DestBase;
8047 SrcDisp = DestDisp++;
8048 foldDisplIfNeeded(DestBase, DestDisp);
8049 }
8050
8051 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
8052 bool IsImmForm = LengthMO.isImm();
8053 bool IsRegForm = !IsImmForm;
8054
8055 // Build and insert one Opcode of Length, with special treatment for memset.
8056 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
8057 MachineBasicBlock::iterator InsPos,
8058 MachineOperand DBase, uint64_t DDisp,
8059 MachineOperand SBase, uint64_t SDisp,
8060 unsigned Length) -> void {
8061 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.")(static_cast <bool> (Length > 0 && Length <=
256 && "Building memory op with bad length.") ? void
(0) : __assert_fail ("Length > 0 && Length <= 256 && \"Building memory op with bad length.\""
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 8061, __extension__
__PRETTY_FUNCTION__))
;
8062 if (IsMemset) {
8063 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
8064 if (ByteMO.isImm())
8065 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
8066 .add(SBase).addImm(SDisp).add(ByteMO);
8067 else
8068 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
8069 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
8070 if (--Length == 0)
8071 return;
8072 }
8073 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
8074 .add(DBase).addImm(DDisp).addImm(Length)
8075 .add(SBase).addImm(SDisp)
8076 .setMemRefs(MI.memoperands());
8077 };
8078
8079 bool NeedsLoop = false;
8080 uint64_t ImmLength = 0;
8081 Register LenAdjReg = SystemZ::NoRegister;
8082 if (IsImmForm) {
8083 ImmLength = LengthMO.getImm();
8084 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
8085 if (ImmLength == 0) {
8086 MI.eraseFromParent();
8087 return MBB;
8088 }
8089 if (Opcode == SystemZ::CLC) {
8090 if (ImmLength > 3 * 256)
8091 // A two-CLC sequence is a clear win over a loop, not least because
8092 // it needs only one branch. A three-CLC sequence needs the same
8093 // number of branches as a loop (i.e. 2), but is shorter. That
8094 // brings us to lengths greater than 768 bytes. It seems relatively
8095 // likely that a difference will be found within the first 768 bytes,
8096 // so we just optimize for the smallest number of branch
8097 // instructions, in order to avoid polluting the prediction buffer
8098 // too much.
8099 NeedsLoop = true;
8100 } else if (ImmLength > 6 * 256)
8101 // The heuristic we use is to prefer loops for anything that would
8102 // require 7 or more MVCs. With these kinds of sizes there isn't much
8103 // to choose between straight-line code and looping code, since the
8104 // time will be dominated by the MVCs themselves.
8105 NeedsLoop = true;
8106 } else {
8107 NeedsLoop = true;
8108 LenAdjReg = LengthMO.getReg();
8109 }
8110
8111 // When generating more than one CLC, all but the last will need to
8112 // branch to the end when a difference is found.
8113 MachineBasicBlock *EndMBB =
8114 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
8115 ? SystemZ::splitBlockAfter(MI, MBB)
8116 : nullptr);
8117
8118 if (NeedsLoop) {
8119 Register StartCountReg =
8120 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
8121 if (IsImmForm) {
8122 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
8123 ImmLength &= 255;
8124 } else {
8125 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
8126 .addReg(LenAdjReg)
8127 .addReg(0)
8128 .addImm(8);
8129 }
8130
8131 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
8132 auto loadZeroAddress = [&]() -> MachineOperand {
8133 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8134 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
8135 return MachineOperand::CreateReg(Reg, false);
8136 };
8137 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
8138 DestBase = loadZeroAddress();
8139 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
8140 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
8141
8142 MachineBasicBlock *StartMBB = nullptr;
8143 MachineBasicBlock *LoopMBB = nullptr;
8144 MachineBasicBlock *NextMBB = nullptr;
8145 MachineBasicBlock *DoneMBB = nullptr;
8146 MachineBasicBlock *AllDoneMBB = nullptr;
8147
8148 Register StartSrcReg = forceReg(MI, SrcBase, TII);
8149 Register StartDestReg =
8150 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
8151
8152 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
8153 Register ThisSrcReg = MRI.createVirtualRegister(RC);
8154 Register ThisDestReg =
8155 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
8156 Register NextSrcReg = MRI.createVirtualRegister(RC);
8157 Register NextDestReg =
8158 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
8159 RC = &SystemZ::GR64BitRegClass;
8160 Register ThisCountReg = MRI.createVirtualRegister(RC);
8161 Register NextCountReg = MRI.createVirtualRegister(RC);
8162
8163 if (IsRegForm) {
8164 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
8165 StartMBB = SystemZ::emitBlockAfter(MBB);
8166 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8167 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
8168 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
8169
8170 // MBB:
8171 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
8172 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8173 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
8174 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8175 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
8176 .addMBB(AllDoneMBB);
8177 MBB->addSuccessor(AllDoneMBB);
8178 if (!IsMemset)
8179 MBB->addSuccessor(StartMBB);
8180 else {
8181 // MemsetOneCheckMBB:
8182 // # Jump to MemsetOneMBB for a memset of length 1, or
8183 // # fall thru to StartMBB.
8184 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
8185 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
8186 MBB->addSuccessor(MemsetOneCheckMBB);
8187 MBB = MemsetOneCheckMBB;
8188 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8189 .addReg(LenAdjReg).addImm(-1);
8190 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8191 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
8192 .addMBB(MemsetOneMBB);
8193 MBB->addSuccessor(MemsetOneMBB, {10, 100});
8194 MBB->addSuccessor(StartMBB, {90, 100});
8195
8196 // MemsetOneMBB:
8197 // # Jump back to AllDoneMBB after a single MVI or STC.
8198 MBB = MemsetOneMBB;
8199 insertMemMemOp(MBB, MBB->end(),
8200 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
8201 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
8202 1);
8203 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
8204 MBB->addSuccessor(AllDoneMBB);
8205 }
8206
8207 // StartMBB:
8208 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
8209 MBB = StartMBB;
8210 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8211 .addReg(StartCountReg).addImm(0);
8212 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8213 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
8214 .addMBB(DoneMBB);
8215 MBB->addSuccessor(DoneMBB);
8216 MBB->addSuccessor(LoopMBB);
8217 }
8218 else {
8219 StartMBB = MBB;
8220 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
8221 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8222 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
8223
8224 // StartMBB:
8225 // # fall through to LoopMBB
8226 MBB->addSuccessor(LoopMBB);
8227
8228 DestBase = MachineOperand::CreateReg(NextDestReg, false);
8229 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
8230 if (EndMBB && !ImmLength)
8231 // If the loop handled the whole CLC range, DoneMBB will be empty with
8232 // CC live-through into EndMBB, so add it as live-in.
8233 DoneMBB->addLiveIn(SystemZ::CC);
8234 }
8235
8236 // LoopMBB:
8237 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
8238 // [ %NextDestReg, NextMBB ]
8239 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
8240 // [ %NextSrcReg, NextMBB ]
8241 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
8242 // [ %NextCountReg, NextMBB ]
8243 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
8244 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
8245 // ( JLH EndMBB )
8246 //
8247 // The prefetch is used only for MVC. The JLH is used only for CLC.
8248 MBB = LoopMBB;
8249 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
8250 .addReg(StartDestReg).addMBB(StartMBB)
8251 .addReg(NextDestReg).addMBB(NextMBB);
8252 if (!HaveSingleBase)
8253 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
8254 .addReg(StartSrcReg).addMBB(StartMBB)
8255 .addReg(NextSrcReg).addMBB(NextMBB);
8256 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
8257 .addReg(StartCountReg).addMBB(StartMBB)
8258 .addReg(NextCountReg).addMBB(NextMBB);
8259 if (Opcode == SystemZ::MVC)
8260 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
8261 .addImm(SystemZ::PFD_WRITE)
8262 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
8263 insertMemMemOp(MBB, MBB->end(),
8264 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
8265 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
8266 if (EndMBB) {
8267 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8268 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
8269 .addMBB(EndMBB);
8270 MBB->addSuccessor(EndMBB);
8271 MBB->addSuccessor(NextMBB);
8272 }
8273
8274 // NextMBB:
8275 // %NextDestReg = LA 256(%ThisDestReg)
8276 // %NextSrcReg = LA 256(%ThisSrcReg)
8277 // %NextCountReg = AGHI %ThisCountReg, -1
8278 // CGHI %NextCountReg, 0
8279 // JLH LoopMBB
8280 // # fall through to DoneMBB
8281 //
8282 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
8283 MBB = NextMBB;
8284 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
8285 .addReg(ThisDestReg).addImm(256).addReg(0);
8286 if (!HaveSingleBase)
8287 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
8288 .addReg(ThisSrcReg).addImm(256).addReg(0);
8289 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
8290 .addReg(ThisCountReg).addImm(-1);
8291 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8292 .addReg(NextCountReg).addImm(0);
8293 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8294 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
8295 .addMBB(LoopMBB);
8296 MBB->addSuccessor(LoopMBB);
8297 MBB->addSuccessor(DoneMBB);
8298
8299 MBB = DoneMBB;
8300 if (IsRegForm) {
8301 // DoneMBB:
8302 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
8303 // # Use EXecute Relative Long for the remainder of the bytes. The target
8304 // instruction of the EXRL will have a length field of 1 since 0 is an
8305 // illegal value. The number of bytes processed becomes (%LenAdjReg &
8306 // 0xff) + 1.
8307 // # Fall through to AllDoneMBB.
8308 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8309 Register RemDestReg = HaveSingleBase ? RemSrcReg
8310 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8311 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
8312 .addReg(StartDestReg).addMBB(StartMBB)
8313 .addReg(NextDestReg).addMBB(NextMBB);
8314 if (!HaveSingleBase)
8315 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
8316 .addReg(StartSrcReg).addMBB(StartMBB)
8317 .addReg(NextSrcReg).addMBB(NextMBB);
8318 if (IsMemset)
8319 insertMemMemOp(MBB, MBB->end(),
8320 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
8321 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
8322 MachineInstrBuilder EXRL_MIB =
8323 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
8324 .addImm(Opcode)
8325 .addReg(LenAdjReg)
8326 .addReg(RemDestReg).addImm(DestDisp)
8327 .addReg(RemSrcReg).addImm(SrcDisp);
8328 MBB->addSuccessor(AllDoneMBB);
8329 MBB = AllDoneMBB;
8330 if (EndMBB) {
8331 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
8332 MBB->addLiveIn(SystemZ::CC);
8333 }
8334 }
8335 }
8336
8337 // Handle any remaining bytes with straight-line code.
8338 while (ImmLength > 0) {
8339 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
8340 // The previous iteration might have created out-of-range displacements.
8341 // Apply them using LA/LAY if so.
8342 foldDisplIfNeeded(DestBase, DestDisp);
8343 foldDisplIfNeeded(SrcBase, SrcDisp);
8344 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
8345 DestDisp += ThisLength;
8346 SrcDisp += ThisLength;
8347 ImmLength -= ThisLength;
8348 // If there's another CLC to go, branch to the end if a difference
8349 // was found.
8350 if (EndMBB && ImmLength > 0) {
8351 MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
8352 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8353 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
8354 .addMBB(EndMBB);
8355 MBB->addSuccessor(EndMBB);
8356 MBB->addSuccessor(NextMBB);
8357 MBB = NextMBB;
8358 }
8359 }
8360 if (EndMBB) {
8361 MBB->addSuccessor(EndMBB);
8362 MBB = EndMBB;
8363 MBB->addLiveIn(SystemZ::CC);
8364 }
8365
8366 MI.eraseFromParent();
8367 return MBB;
8368}
8369
8370// Decompose string pseudo-instruction MI into a loop that continually performs
8371// Opcode until CC != 3.
8372MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
8373 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
8374 MachineFunction &MF = *MBB->getParent();
8375 const SystemZInstrInfo *TII =
8376 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
8377 MachineRegisterInfo &MRI = MF.getRegInfo();
8378 DebugLoc DL = MI.getDebugLoc();
8379
8380 uint64_t End1Reg = MI.getOperand(0).getReg();
8381 uint64_t Start1Reg = MI.getOperand(1).getReg();
8382 uint64_t Start2Reg = MI.getOperand(2).getReg();
8383 uint64_t CharReg = MI.getOperand(3).getReg();
8384
8385 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
8386 uint64_t This1Reg = MRI.createVirtualRegister(RC);
8387 uint64_t This2Reg = MRI.createVirtualRegister(RC);
8388 uint64_t End2Reg = MRI.createVirtualRegister(RC);
8389
8390 MachineBasicBlock *StartMBB = MBB;
8391 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
8392 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8393
8394 // StartMBB:
8395 // # fall through to LoopMBB
8396 MBB->addSuccessor(LoopMBB);
8397
8398 // LoopMBB:
8399 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
8400 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
8401 // R0L = %CharReg
8402 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
8403 // JO LoopMBB
8404 // # fall through to DoneMBB
8405 //
8406 // The load of R0L can be hoisted by post-RA LICM.
8407 MBB = LoopMBB;
8408
8409 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
8410 .addReg(Start1Reg).addMBB(StartMBB)
8411 .addReg(End1Reg).addMBB(LoopMBB);
8412 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
8413 .addReg(Start2Reg).addMBB(StartMBB)
8414 .addReg(End2Reg).addMBB(LoopMBB);
8415 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
8416 BuildMI(MBB, DL, TII->get(Opcode))
8417 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
8418 .addReg(This1Reg).addReg(This2Reg);
8419 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8420 .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB);
8421 MBB->addSuccessor(LoopMBB);
8422 MBB->addSuccessor(DoneMBB);
8423
8424 DoneMBB->addLiveIn(SystemZ::CC);
8425
8426 MI.eraseFromParent();
8427 return DoneMBB;
8428}
8429
8430// Update TBEGIN instruction with final opcode and register clobbers.
8431MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
8432 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
8433 bool NoFloat) const {
8434 MachineFunction &MF = *MBB->getParent();
8435 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
8436 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8437
8438 // Update opcode.
8439 MI.setDesc(TII->get(Opcode));
8440
8441 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
8442 // Make sure to add the corresponding GRSM bits if they are missing.
8443 uint64_t Control = MI.getOperand(2).getImm();
8444 static const unsigned GPRControlBit[16] = {
8445 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
8446 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
8447 };
8448 Control |= GPRControlBit[15];
8449 if (TFI->hasFP(MF))
8450 Control |= GPRControlBit[11];
8451 MI.getOperand(2).setImm(Control);
8452
8453 // Add GPR clobbers.
8454 for (int I = 0; I < 16; I++) {
8455 if ((Control & GPRControlBit[I]) == 0) {
8456 unsigned Reg = SystemZMC::GR64Regs[I];
8457 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
8458 }
8459 }
8460
8461 // Add FPR/VR clobbers.
8462 if (!NoFloat && (Control & 4) != 0) {
8463 if (Subtarget.hasVector()) {
8464 for (unsigned Reg : SystemZMC::VR128Regs) {
8465 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
8466 }
8467 } else {
8468 for (unsigned Reg : SystemZMC::FP64Regs) {
8469 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
8470 }
8471 }
8472 }
8473
8474 return MBB;
8475}
8476
8477MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
8478 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
8479 MachineFunction &MF = *MBB->getParent();
8480 MachineRegisterInfo *MRI = &MF.getRegInfo();
8481 const SystemZInstrInfo *TII =
8482 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
8483 DebugLoc DL = MI.getDebugLoc();
8484
8485 Register SrcReg = MI.getOperand(0).getReg();
8486
8487 // Create new virtual register of the same class as source.
8488 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
8489 Register DstReg = MRI->createVirtualRegister(RC);
8490
8491 // Replace pseudo with a normal load-and-test that models the def as
8492 // well.
8493 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
8494 .addReg(SrcReg)
8495 .setMIFlags(MI.getFlags());
8496 MI.eraseFromParent();
8497
8498 return MBB;
8499}
8500
8501MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
8502 MachineInstr &MI, MachineBasicBlock *MBB) const {
8503 MachineFunction &MF = *MBB->getParent();
8504 MachineRegisterInfo *MRI = &MF.getRegInfo();
8505 const SystemZInstrInfo *TII =
8506 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
8507 DebugLoc DL = MI.getDebugLoc();
8508 const unsigned ProbeSize = getStackProbeSize(MF);
8509 Register DstReg = MI.getOperand(0).getReg();
8510 Register SizeReg = MI.getOperand(2).getReg();
8511
8512 MachineBasicBlock *StartMBB = MBB;
8513 MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB);
8514 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
8515 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
8516 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
8517 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
8518
8519 MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(),
8520 MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1));
8521
8522 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8523 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8524
8525 // LoopTestMBB
8526 // BRC TailTestMBB
8527 // # fallthrough to LoopBodyMBB
8528 StartMBB->addSuccessor(LoopTestMBB);
8529 MBB = LoopTestMBB;
8530 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
8531 .addReg(SizeReg)
8532 .addMBB(StartMBB)
8533 .addReg(IncReg)
8534 .addMBB(LoopBodyMBB);
8535 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
8536 .addReg(PHIReg)
8537 .addImm(ProbeSize);
8538 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8539 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT)
8540 .addMBB(TailTestMBB);
8541 MBB->addSuccessor(LoopBodyMBB);
8542 MBB->addSuccessor(TailTestMBB);
8543
8544 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
8545 // J LoopTestMBB
8546 MBB = LoopBodyMBB;
8547 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
8548 .addReg(PHIReg)
8549 .addImm(ProbeSize);
8550 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
8551 .addReg(SystemZ::R15D)
8552 .addImm(ProbeSize);
8553 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
8554 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
8555 .setMemRefs(VolLdMMO);
8556 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
8557 MBB->addSuccessor(LoopTestMBB);
8558
8559 // TailTestMBB
8560 // BRC DoneMBB
8561 // # fallthrough to TailMBB
8562 MBB = TailTestMBB;
8563 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8564 .addReg(PHIReg)
8565 .addImm(0);
8566 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8567 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
8568 .addMBB(DoneMBB);
8569 MBB->addSuccessor(TailMBB);
8570 MBB->addSuccessor(DoneMBB);
8571
8572 // TailMBB
8573 // # fallthrough to DoneMBB
8574 MBB = TailMBB;
8575 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
8576 .addReg(SystemZ::R15D)
8577 .addReg(PHIReg);
8578 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
8579 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
8580 .setMemRefs(VolLdMMO);
8581 MBB->addSuccessor(DoneMBB);
8582
8583 // DoneMBB
8584 MBB = DoneMBB;
8585 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
8586 .addReg(SystemZ::R15D);
8587
8588 MI.eraseFromParent();
8589 return DoneMBB;
8590}
8591
8592SDValue SystemZTargetLowering::
8593getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
8594 MachineFunction &MF = DAG.getMachineFunction();
8595 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
8596 SDLoc DL(SP);
8597 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
8598 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
8599}
8600
8601MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
8602 MachineInstr &MI, MachineBasicBlock *MBB) const {
8603 switch (MI.getOpcode()) {
8604 case SystemZ::Select32:
8605 case SystemZ::Select64:
8606 case SystemZ::SelectF32:
8607 case SystemZ::SelectF64:
8608 case SystemZ::SelectF128:
8609 case SystemZ::SelectVR32:
8610 case SystemZ::SelectVR64:
8611 case SystemZ::SelectVR128:
8612 return emitSelect(MI, MBB);
8613
8614 case SystemZ::CondStore8Mux:
8615 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
8616 case SystemZ::CondStore8MuxInv:
8617 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
8618 case SystemZ::CondStore16Mux:
8619 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
8620 case SystemZ::CondStore16MuxInv:
8621 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
8622 case SystemZ::CondStore32Mux:
8623 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
8624 case SystemZ::CondStore32MuxInv:
8625 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
8626 case SystemZ::CondStore8:
8627 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
8628 case SystemZ::CondStore8Inv:
8629 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
8630 case SystemZ::CondStore16:
8631 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
8632 case SystemZ::CondStore16Inv:
8633 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
8634 case SystemZ::CondStore32:
8635 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
8636 case SystemZ::CondStore32Inv:
8637 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
8638 case SystemZ::CondStore64:
8639 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
8640 case SystemZ::CondStore64Inv:
8641 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
8642 case SystemZ::CondStoreF32:
8643 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
8644 case SystemZ::CondStoreF32Inv:
8645 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
8646 case SystemZ::CondStoreF64:
8647 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
8648 case SystemZ::CondStoreF64Inv:
8649 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
8650
8651 case SystemZ::PAIR128:
8652 return emitPair128(MI, MBB);
8653 case SystemZ::AEXT128:
8654 return emitExt128(MI, MBB, false);
8655 case SystemZ::ZEXT128:
8656 return emitExt128(MI, MBB, true);
8657
8658 case SystemZ::ATOMIC_SWAPW:
8659 return emitAtomicLoadBinary(MI, MBB, 0, 0);
8660 case SystemZ::ATOMIC_SWAP_32:
8661 return emitAtomicLoadBinary(MI, MBB, 0, 32);
8662 case SystemZ::ATOMIC_SWAP_64:
8663 return emitAtomicLoadBinary(MI, MBB, 0, 64);
8664
8665 case SystemZ::ATOMIC_LOADW_AR:
8666 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0);
8667 case SystemZ::ATOMIC_LOADW_AFI:
8668 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0);
8669 case SystemZ::ATOMIC_LOAD_AR:
8670 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32);
8671 case SystemZ::ATOMIC_LOAD_AHI:
8672 return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32);
8673 case SystemZ::ATOMIC_LOAD_AFI:
8674 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32);
8675 case SystemZ::ATOMIC_LOAD_AGR:
8676 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64);
8677 case SystemZ::ATOMIC_LOAD_AGHI:
8678 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64);
8679 case SystemZ::ATOMIC_LOAD_AGFI:
8680 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64);
8681
8682 case SystemZ::ATOMIC_LOADW_SR:
8683 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0);
8684 case SystemZ::ATOMIC_LOAD_SR:
8685 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32);
8686 case SystemZ::ATOMIC_LOAD_SGR:
8687 return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64);
8688
8689 case SystemZ::ATOMIC_LOADW_NR:
8690 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0);
8691 case SystemZ::ATOMIC_LOADW_NILH:
8692 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0);
8693 case SystemZ::ATOMIC_LOAD_NR:
8694 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32);
8695 case SystemZ::ATOMIC_LOAD_NILL:
8696 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32);
8697 case SystemZ::ATOMIC_LOAD_NILH:
8698 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32);
8699 case SystemZ::ATOMIC_LOAD_NILF:
8700 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32);
8701 case SystemZ::ATOMIC_LOAD_NGR:
8702 return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64);
8703 case SystemZ::ATOMIC_LOAD_NILL64:
8704 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64);
8705 case SystemZ::ATOMIC_LOAD_NILH64:
8706 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64);
8707 case SystemZ::ATOMIC_LOAD_NIHL64:
8708 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64);
8709 case SystemZ::ATOMIC_LOAD_NIHH64:
8710 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64);
8711 case SystemZ::ATOMIC_LOAD_NILF64:
8712 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64);
8713 case SystemZ::ATOMIC_LOAD_NIHF64:
8714 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64);
8715
8716 case SystemZ::ATOMIC_LOADW_OR:
8717 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0);
8718 case SystemZ::ATOMIC_LOADW_OILH:
8719 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0);
8720 case SystemZ::ATOMIC_LOAD_OR:
8721 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32);
8722 case SystemZ::ATOMIC_LOAD_OILL:
8723 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32);
8724 case SystemZ::ATOMIC_LOAD_OILH:
8725 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32);
8726 case SystemZ::ATOMIC_LOAD_OILF:
8727 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32);
8728 case SystemZ::ATOMIC_LOAD_OGR:
8729 return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64);
8730 case SystemZ::ATOMIC_LOAD_OILL64:
8731 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64);
8732 case SystemZ::ATOMIC_LOAD_OILH64:
8733 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64);
8734 case SystemZ::ATOMIC_LOAD_OIHL64:
8735 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL64, 64);
8736 case SystemZ::ATOMIC_LOAD_OIHH64:
8737 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH64, 64);
8738 case SystemZ::ATOMIC_LOAD_OILF64:
8739 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64);
8740 case SystemZ::ATOMIC_LOAD_OIHF64:
8741 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF64, 64);
8742
8743 case SystemZ::ATOMIC_LOADW_XR:
8744 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0);
8745 case SystemZ::ATOMIC_LOADW_XILF:
8746 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0);
8747 case SystemZ::ATOMIC_LOAD_XR:
8748 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32);
8749 case SystemZ::ATOMIC_LOAD_XILF:
8750 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32);
8751 case SystemZ::ATOMIC_LOAD_XGR:
8752 return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64);
8753 case SystemZ::ATOMIC_LOAD_XILF64:
8754 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64);
8755 case SystemZ::ATOMIC_LOAD_XIHF64:
8756 return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF64, 64);
8757
8758 case SystemZ::ATOMIC_LOADW_NRi:
8759 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true);
8760 case SystemZ::ATOMIC_LOADW_NILHi:
8761 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true);
8762 case SystemZ::ATOMIC_LOAD_NRi:
8763 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true);
8764 case SystemZ::ATOMIC_LOAD_NILLi:
8765 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true);
8766 case SystemZ::ATOMIC_LOAD_NILHi:
8767 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true);
8768 case SystemZ::ATOMIC_LOAD_NILFi:
8769 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true);
8770 case SystemZ::ATOMIC_LOAD_NGRi:
8771 return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true);
8772 case SystemZ::ATOMIC_LOAD_NILL64i:
8773 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true);
8774 case SystemZ::ATOMIC_LOAD_NILH64i:
8775 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true);
8776 case SystemZ::ATOMIC_LOAD_NIHL64i:
8777 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64, true);
8778 case SystemZ::ATOMIC_LOAD_NIHH64i:
8779 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64, true);
8780 case SystemZ::ATOMIC_LOAD_NILF64i:
8781 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true);
8782 case SystemZ::ATOMIC_LOAD_NIHF64i:
8783 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64, true);
8784
8785 case SystemZ::ATOMIC_LOADW_MIN:
8786 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
8787 SystemZ::CCMASK_CMP_LE, 0);
8788 case SystemZ::ATOMIC_LOAD_MIN_32:
8789 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
8790 SystemZ::CCMASK_CMP_LE, 32);
8791 case SystemZ::ATOMIC_LOAD_MIN_64:
8792 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
8793 SystemZ::CCMASK_CMP_LE, 64);
8794
8795 case SystemZ::ATOMIC_LOADW_MAX:
8796 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
8797 SystemZ::CCMASK_CMP_GE, 0);
8798 case SystemZ::ATOMIC_LOAD_MAX_32:
8799 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
8800 SystemZ::CCMASK_CMP_GE, 32);
8801 case SystemZ::ATOMIC_LOAD_MAX_64:
8802 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
8803 SystemZ::CCMASK_CMP_GE, 64);
8804
8805 case SystemZ::ATOMIC_LOADW_UMIN:
8806 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
8807 SystemZ::CCMASK_CMP_LE, 0);
8808 case SystemZ::ATOMIC_LOAD_UMIN_32:
8809 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
8810 SystemZ::CCMASK_CMP_LE, 32);
8811 case SystemZ::ATOMIC_LOAD_UMIN_64:
8812 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
8813 SystemZ::CCMASK_CMP_LE, 64);
8814
8815 case SystemZ::ATOMIC_LOADW_UMAX:
8816 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
8817 SystemZ::CCMASK_CMP_GE, 0);
8818 case SystemZ::ATOMIC_LOAD_UMAX_32:
8819 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
8820 SystemZ::CCMASK_CMP_GE, 32);
8821 case SystemZ::ATOMIC_LOAD_UMAX_64:
8822 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
8823 SystemZ::CCMASK_CMP_GE, 64);
8824
8825 case SystemZ::ATOMIC_CMP_SWAPW:
8826 return emitAtomicCmpSwapW(MI, MBB);
8827 case SystemZ::MVCImm:
8828 case SystemZ::MVCReg:
8829 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
8830 case SystemZ::NCImm:
8831 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
8832 case SystemZ::OCImm:
8833 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
8834 case SystemZ::XCImm:
8835 case SystemZ::XCReg:
8836 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
8837 case SystemZ::CLCImm:
8838 case SystemZ::CLCReg:
8839 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
8840 case SystemZ::MemsetImmImm:
8841 case SystemZ::MemsetImmReg:
8842 case SystemZ::MemsetRegImm:
8843 case SystemZ::MemsetRegReg:
8844 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
8845 case SystemZ::CLSTLoop:
8846 return emitStringWrapper(MI, MBB, SystemZ::CLST);
8847 case SystemZ::MVSTLoop:
8848 return emitStringWrapper(MI, MBB, SystemZ::MVST);
8849 case SystemZ::SRSTLoop:
8850 return emitStringWrapper(MI, MBB, SystemZ::SRST);
8851 case SystemZ::TBEGIN:
8852 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
8853 case SystemZ::TBEGIN_nofloat:
8854 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
8855 case SystemZ::TBEGINC:
8856 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
8857 case SystemZ::LTEBRCompare_VecPseudo:
8858 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
8859 case SystemZ::LTDBRCompare_VecPseudo:
8860 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
8861 case SystemZ::LTXBRCompare_VecPseudo:
8862 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
8863
8864 case SystemZ::PROBED_ALLOCA:
8865 return emitProbedAlloca(MI, MBB);
8866
8867 case TargetOpcode::STACKMAP:
8868 case TargetOpcode::PATCHPOINT:
8869 return emitPatchPoint(MI, MBB);
8870
8871 default:
8872 llvm_unreachable("Unexpected instr type to insert")::llvm::llvm_unreachable_internal("Unexpected instr type to insert"
, "llvm/lib/Target/SystemZ/SystemZISelLowering.cpp", 8872)
;
8873 }
8874}
8875
8876// This is only used by the isel schedulers, and is needed only to prevent
8877// compiler from crashing when list-ilp is used.
8878const TargetRegisterClass *
8879SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
8880 if (VT == MVT::Untyped)
8881 return &SystemZ::ADDR128BitRegClass;
8882 return TargetLowering::getRepRegClassFor(VT);
8883}

/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/include/llvm/Support/MathExtras.h

1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains some functions that are useful for math stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_SUPPORT_MATHEXTRAS_H
14#define LLVM_SUPPORT_MATHEXTRAS_H
15
16#include "llvm/Support/Compiler.h"
17#include <cassert>
18#include <climits>
19#include <cmath>
20#include <cstdint>
21#include <cstring>
22#include <limits>
23#include <type_traits>
24
25#ifdef __ANDROID_NDK__
26#include <android/api-level.h>
27#endif
28
29#ifdef _MSC_VER
30// Declare these intrinsics manually rather including intrin.h. It's very
31// expensive, and MathExtras.h is popular.
32// #include <intrin.h>
33extern "C" {
34unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
35unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
36unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
37unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
38}
39#endif
40
41namespace llvm {
42
43/// The behavior an operation has on an input of 0.
44enum ZeroBehavior {
45 /// The returned value is undefined.
46 ZB_Undefined,
47 /// The returned value is numeric_limits<T>::max()
48 ZB_Max,
49 /// The returned value is numeric_limits<T>::digits
50 ZB_Width
51};
52
53/// Mathematical constants.
54namespace numbers {
55// TODO: Track C++20 std::numbers.
56// TODO: Favor using the hexadecimal FP constants (requires C++17).
57constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113
58 egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620
59 ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162
60 ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392
61 log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0)
62 log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2)
63 pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796
64 inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541
65 sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161
66 inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197
67 sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219
68 inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1)
69 sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194
70 inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1)
71 phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622
72constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113
73 egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620
74 ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162
75 ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392
76 log2ef = 1.44269504F, // (0x1.715476P+0)
77 log10ef = .434294482F, // (0x1.bcb7b2P-2)
78 pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796
79 inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541
80 sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161
81 inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197
82 sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193
83 inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1)
84 sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194
85 inv_sqrt3f = .577350269F, // (0x1.279a74P-1)
86 phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622
87} // namespace numbers
88
89namespace detail {
90template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
91 static unsigned count(T Val, ZeroBehavior) {
92 if (!Val)
93 return std::numeric_limits<T>::digits;
94 if (Val & 0x1)
95 return 0;
96
97 // Bisection method.
98 unsigned ZeroBits = 0;
99 T Shift = std::numeric_limits<T>::digits >> 1;
100 T Mask = std::numeric_limits<T>::max() >> Shift;
101 while (Shift) {
102 if ((Val & Mask) == 0) {
103 Val >>= Shift;
104 ZeroBits |= Shift;
105 }
106 Shift >>= 1;
107 Mask >>= Shift;
108 }
109 return ZeroBits;
110 }
111};
112
113#if defined(__GNUC__4) || defined(_MSC_VER)
114template <typename T> struct TrailingZerosCounter<T, 4> {
115 static unsigned count(T Val, ZeroBehavior ZB) {
116 if (ZB != ZB_Undefined && Val == 0)
117 return 32;
118
119#if __has_builtin(__builtin_ctz)1 || defined(__GNUC__4)
120 return __builtin_ctz(Val);
121#elif defined(_MSC_VER)
122 unsigned long Index;
123 _BitScanForward(&Index, Val);
124 return Index;
125#endif
126 }
127};
128
129#if !defined(_MSC_VER) || defined(_M_X64)
130template <typename T> struct TrailingZerosCounter<T, 8> {
131 static unsigned count(T Val, ZeroBehavior ZB) {
132 if (ZB != ZB_Undefined && Val == 0)
133 return 64;
134
135#if __has_builtin(__builtin_ctzll)1 || defined(__GNUC__4)
136 return __builtin_ctzll(Val);
137#elif defined(_MSC_VER)
138 unsigned long Index;
139 _BitScanForward64(&Index, Val);
140 return Index;
141#endif
142 }
143};
144#endif
145#endif
146} // namespace detail
147
148/// Count number of 0's from the least significant bit to the most
149/// stopping at the first 1.
150///
151/// Only unsigned integral types are allowed.
152///
153/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
154/// valid arguments.
155template <typename T>
156unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
157 static_assert(std::numeric_limits<T>::is_integer &&
158 !std::numeric_limits<T>::is_signed,
159 "Only unsigned integral types are allowed.");
160 return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
161}
162
163namespace detail {
164template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
165 static unsigned count(T Val, ZeroBehavior) {
166 if (!Val)
167 return std::numeric_limits<T>::digits;
168
169 // Bisection method.
170 unsigned ZeroBits = 0;
171 for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
172 T Tmp = Val >> Shift;
173 if (Tmp)
174 Val = Tmp;
175 else
176 ZeroBits |= Shift;
177 }
178 return ZeroBits;
179 }
180};
181
182#if defined(__GNUC__4) || defined(_MSC_VER)
183template <typename T> struct LeadingZerosCounter<T, 4> {
184 static unsigned count(T Val, ZeroBehavior ZB) {
185 if (ZB != ZB_Undefined && Val == 0)
186 return 32;
187
188#if __has_builtin(__builtin_clz)1 || defined(__GNUC__4)
189 return __builtin_clz(Val);
190#elif defined(_MSC_VER)
191 unsigned long Index;
192 _BitScanReverse(&Index, Val);
193 return Index ^ 31;
194#endif
195 }
196};
197
198#if !defined(_MSC_VER) || defined(_M_X64)
199template <typename T> struct LeadingZerosCounter<T, 8> {
200 static unsigned count(T Val, ZeroBehavior ZB) {
201 if (ZB != ZB_Undefined && Val == 0)
202 return 64;
203
204#if __has_builtin(__builtin_clzll)1 || defined(__GNUC__4)
205 return __builtin_clzll(Val);
206#elif defined(_MSC_VER)
207 unsigned long Index;
208 _BitScanReverse64(&Index, Val);
209 return Index ^ 63;
210#endif
211 }
212};
213#endif
214#endif
215} // namespace detail
216
217/// Count number of 0's from the most significant bit to the least
218/// stopping at the first 1.
219///
220/// Only unsigned integral types are allowed.
221///
222/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
223/// valid arguments.
224template <typename T>
225unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
226 static_assert(std::numeric_limits<T>::is_integer &&
227 !std::numeric_limits<T>::is_signed,
228 "Only unsigned integral types are allowed.");
229 return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
230}
231
232/// Get the index of the first set bit starting from the least
233/// significant bit.
234///
235/// Only unsigned integral types are allowed.
236///
237/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
238/// valid arguments.
239template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
240 if (ZB
13.1
'ZB' is equal to ZB_Max
13.1
'ZB' is equal to ZB_Max
13.1
'ZB' is equal to ZB_Max
== ZB_Max && Val == 0)
14
Assuming 'Val' is equal to 0
15
Taking true branch
241 return std::numeric_limits<T>::max();
16
Calling 'numeric_limits::max'
18
Returning from 'numeric_limits::max'
19
Returning the value 18446744073709551615
242
243 return countTrailingZeros(Val, ZB_Undefined);
244}
245
246/// Create a bitmask with the N right-most bits set to 1, and all other
247/// bits set to 0. Only unsigned types are allowed.
248template <typename T> T maskTrailingOnes(unsigned N) {
249 static_assert(std::is_unsigned<T>::value, "Invalid type!");
250 const unsigned Bits = CHAR_BIT8 * sizeof(T);
251 assert(N <= Bits && "Invalid bit index")(static_cast <bool> (N <= Bits && "Invalid bit index"
) ? void (0) : __assert_fail ("N <= Bits && \"Invalid bit index\""
, "llvm/include/llvm/Support/MathExtras.h", 251, __extension__
__PRETTY_FUNCTION__))
;
252 return N == 0 ? 0 : (T(-1) >> (Bits - N));
253}
254
255/// Create a bitmask with the N left-most bits set to 1, and all other
256/// bits set to 0. Only unsigned types are allowed.
257template <typename T> T maskLeadingOnes(unsigned N) {
258 return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
259}
260
261/// Create a bitmask with the N right-most bits set to 0, and all other
262/// bits set to 1. Only unsigned types are allowed.
263template <typename T> T maskTrailingZeros(unsigned N) {
264 return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
265}
266
267/// Create a bitmask with the N left-most bits set to 0, and all other
268/// bits set to 1. Only unsigned types are allowed.
269template <typename T> T maskLeadingZeros(unsigned N) {
270 return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
271}
272
273/// Get the index of the last set bit starting from the least
274/// significant bit.
275///
276/// Only unsigned integral types are allowed.
277///
278/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
279/// valid arguments.
280template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
281 if (ZB == ZB_Max && Val == 0)
282 return std::numeric_limits<T>::max();
283
284 // Use ^ instead of - because both gcc and llvm can remove the associated ^
285 // in the __builtin_clz intrinsic on x86.
286 return countLeadingZeros(Val, ZB_Undefined) ^
287 (std::numeric_limits<T>::digits - 1);
288}
289
290/// Macro compressed bit reversal table for 256 bits.
291///
292/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
293static const unsigned char BitReverseTable256[256] = {
294#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
295#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
296#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
297 R6(0), R6(2), R6(1), R6(3)
298#undef R2
299#undef R4
300#undef R6
301};
302
303/// Reverse the bits in \p Val.
304template <typename T>
305T reverseBits(T Val) {
306 unsigned char in[sizeof(Val)];
307 unsigned char out[sizeof(Val)];
308 std::memcpy(in, &Val, sizeof(Val));
309 for (unsigned i = 0; i < sizeof(Val); ++i)
310 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
311 std::memcpy(&Val, out, sizeof(Val));
312 return Val;
313}
314
315#if __has_builtin(__builtin_bitreverse8)1
316template<>
317inline uint8_t reverseBits<uint8_t>(uint8_t Val) {
318 return __builtin_bitreverse8(Val);
319}
320#endif
321
322#if __has_builtin(__builtin_bitreverse16)1
323template<>
324inline uint16_t reverseBits<uint16_t>(uint16_t Val) {
325 return __builtin_bitreverse16(Val);
326}
327#endif
328
329#if __has_builtin(__builtin_bitreverse32)1
330template<>
331inline uint32_t reverseBits<uint32_t>(uint32_t Val) {
332 return __builtin_bitreverse32(Val);
333}
334#endif
335
336#if __has_builtin(__builtin_bitreverse64)1
337template<>
338inline uint64_t reverseBits<uint64_t>(uint64_t Val) {
339 return __builtin_bitreverse64(Val);
340}
341#endif
342
343// NOTE: The following support functions use the _32/_64 extensions instead of
344// type overloading so that signed and unsigned integers can be used without
345// ambiguity.
346
347/// Return the high 32 bits of a 64 bit value.
348constexpr inline uint32_t Hi_32(uint64_t Value) {
349 return static_cast<uint32_t>(Value >> 32);
350}
351
352/// Return the low 32 bits of a 64 bit value.
353constexpr inline uint32_t Lo_32(uint64_t Value) {
354 return static_cast<uint32_t>(Value);
355}
356
357/// Make a 64-bit integer from a high / low pair of 32-bit integers.
358constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
359 return ((uint64_t)High << 32) | (uint64_t)Low;
360}
361
362/// Checks if an integer fits into the given bit width.
363template <unsigned N> constexpr inline bool isInt(int64_t x) {
364 return N >= 64 || (-(INT64_C(1)1L<<(N-1)) <= x && x < (INT64_C(1)1L<<(N-1)));
365}
366// Template specializations to get better code for common cases.
367template <> constexpr inline bool isInt<8>(int64_t x) {
368 return static_cast<int8_t>(x) == x;
369}
370template <> constexpr inline bool isInt<16>(int64_t x) {
371 return static_cast<int16_t>(x) == x;
372}
373template <> constexpr inline bool isInt<32>(int64_t x) {
374 return static_cast<int32_t>(x) == x;
375}
376
377/// Checks if a signed integer is an N bit number shifted left by S.
378template <unsigned N, unsigned S>
379constexpr inline bool isShiftedInt(int64_t x) {
380 static_assert(
381 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
382 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
383 return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
384}
385
386/// Checks if an unsigned integer fits into the given bit width.
387///
388/// This is written as two functions rather than as simply
389///
390/// return N >= 64 || X < (UINT64_C(1) << N);
391///
392/// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting
393/// left too many places.
394template <unsigned N>
395constexpr inline std::enable_if_t<(N < 64), bool> isUInt(uint64_t X) {
396 static_assert(N > 0, "isUInt<0> doesn't make sense");
397 return X < (UINT64_C(1)1UL << (N));
398}
399template <unsigned N>
400constexpr inline std::enable_if_t<N >= 64, bool> isUInt(uint64_t) {
401 return true;
402}
403
404// Template specializations to get better code for common cases.
405template <> constexpr inline bool isUInt<8>(uint64_t x) {
406 return static_cast<uint8_t>(x) == x;
407}
408template <> constexpr inline bool isUInt<16>(uint64_t x) {
409 return static_cast<uint16_t>(x) == x;
410}
411template <> constexpr inline bool isUInt<32>(uint64_t x) {
412 return static_cast<uint32_t>(x) == x;
413}
414
415/// Checks if a unsigned integer is an N bit number shifted left by S.
416template <unsigned N, unsigned S>
417constexpr inline bool isShiftedUInt(uint64_t x) {
418 static_assert(
419 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
420 static_assert(N + S <= 64,
421 "isShiftedUInt<N, S> with N + S > 64 is too wide.");
422 // Per the two static_asserts above, S must be strictly less than 64. So
423 // 1 << S is not undefined behavior.
424 return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
425}
426
427/// Gets the maximum value for a N-bit unsigned integer.
428inline uint64_t maxUIntN(uint64_t N) {
429 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "llvm/include/llvm/Support/MathExtras.h", 429, __extension__
__PRETTY_FUNCTION__))
;
430
431 // uint64_t(1) << 64 is undefined behavior, so we can't do
432 // (uint64_t(1) << N) - 1
433 // without checking first that N != 64. But this works and doesn't have a
434 // branch.
435 return UINT64_MAX(18446744073709551615UL) >> (64 - N);
436}
437
438/// Gets the minimum value for a N-bit signed integer.
439inline int64_t minIntN(int64_t N) {
440 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "llvm/include/llvm/Support/MathExtras.h", 440, __extension__
__PRETTY_FUNCTION__))
;
441
442 return UINT64_C(1)1UL + ~(UINT64_C(1)1UL << (N - 1));
443}
444
445/// Gets the maximum value for a N-bit signed integer.
446inline int64_t maxIntN(int64_t N) {
447 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "llvm/include/llvm/Support/MathExtras.h", 447, __extension__
__PRETTY_FUNCTION__))
;
448
449 // This relies on two's complement wraparound when N == 64, so we convert to
450 // int64_t only at the very end to avoid UB.
451 return (UINT64_C(1)1UL << (N - 1)) - 1;
452}
453
454/// Checks if an unsigned integer fits into the given (dynamic) bit width.
455inline bool isUIntN(unsigned N, uint64_t x) {
456 return N >= 64 || x <= maxUIntN(N);
457}
458
459/// Checks if an signed integer fits into the given (dynamic) bit width.
460inline bool isIntN(unsigned N, int64_t x) {
461 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
462}
463
464/// Return true if the argument is a non-empty sequence of ones starting at the
465/// least significant bit with the remainder zero (32 bit version).
466/// Ex. isMask_32(0x0000FFFFU) == true.
467constexpr inline bool isMask_32(uint32_t Value) {
468 return Value && ((Value + 1) & Value) == 0;
469}
470
471/// Return true if the argument is a non-empty sequence of ones starting at the
472/// least significant bit with the remainder zero (64 bit version).
473constexpr inline bool isMask_64(uint64_t Value) {
474 return Value && ((Value + 1) & Value) == 0;
475}
476
477/// Return true if the argument contains a non-empty sequence of ones with the
478/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
479constexpr inline bool isShiftedMask_32(uint32_t Value) {
480 return Value && isMask_32((Value - 1) | Value);
481}
482
483/// Return true if the argument contains a non-empty sequence of ones with the
484/// remainder zero (64 bit version.)
485constexpr inline bool isShiftedMask_64(uint64_t Value) {
486 return Value && isMask_64((Value - 1) | Value);
487}
488
489/// Return true if the argument is a power of two > 0.
490/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
491constexpr inline bool isPowerOf2_32(uint32_t Value) {
492 return Value && !(Value & (Value - 1));
493}
494
495/// Return true if the argument is a power of two > 0 (64 bit edition.)
496constexpr inline bool isPowerOf2_64(uint64_t Value) {
497 return Value && !(Value & (Value - 1));
498}
499
500/// Count the number of ones from the most significant bit to the first
501/// zero bit.
502///
503/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
504/// Only unsigned integral types are allowed.
505///
506/// \param ZB the behavior on an input of all ones. Only ZB_Width and
507/// ZB_Undefined are valid arguments.
508template <typename T>
509unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
510 static_assert(std::numeric_limits<T>::is_integer &&
511 !std::numeric_limits<T>::is_signed,
512 "Only unsigned integral types are allowed.");
513 return countLeadingZeros<T>(~Value, ZB);
514}
515
516/// Count the number of ones from the least significant bit to the first
517/// zero bit.
518///
519/// Ex. countTrailingOnes(0x00FF00FF) == 8.
520/// Only unsigned integral types are allowed.
521///
522/// \param ZB the behavior on an input of all ones. Only ZB_Width and
523/// ZB_Undefined are valid arguments.
524template <typename T>
525unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
526 static_assert(std::numeric_limits<T>::is_integer &&
527 !std::numeric_limits<T>::is_signed,
528 "Only unsigned integral types are allowed.");
529 return countTrailingZeros<T>(~Value, ZB);
530}
531
532namespace detail {
533template <typename T, std::size_t SizeOfT> struct PopulationCounter {
534 static unsigned count(T Value) {
535 // Generic version, forward to 32 bits.
536 static_assert(SizeOfT <= 4, "Not implemented!");
537#if defined(__GNUC__4)
538 return __builtin_popcount(Value);
539#else
540 uint32_t v = Value;
541 v = v - ((v >> 1) & 0x55555555);
542 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
543 return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
544#endif
545 }
546};
547
548template <typename T> struct PopulationCounter<T, 8> {
549 static unsigned count(T Value) {
550#if defined(__GNUC__4)
551 return __builtin_popcountll(Value);
552#else
553 uint64_t v = Value;
554 v = v - ((v >> 1) & 0x5555555555555555ULL);
555 v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
556 v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
557 return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56);
558#endif
559 }
560};
561} // namespace detail
562
563/// Count the number of set bits in a value.
564/// Ex. countPopulation(0xF000F000) = 8
565/// Returns 0 if the word is zero.
566template <typename T>
567inline unsigned countPopulation(T Value) {
568 static_assert(std::numeric_limits<T>::is_integer &&
569 !std::numeric_limits<T>::is_signed,
570 "Only unsigned integral types are allowed.");
571 return detail::PopulationCounter<T, sizeof(T)>::count(Value);
572}
573
574/// Return true if the argument contains a non-empty sequence of ones with the
575/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
576/// If true, \p MaskIdx will specify the index of the lowest set bit and \p
577/// MaskLen is updated to specify the length of the mask, else neither are
578/// updated.
579inline bool isShiftedMask_32(uint32_t Value, unsigned &MaskIdx,
580 unsigned &MaskLen) {
581 if (!isShiftedMask_32(Value))
582 return false;
583 MaskIdx = countTrailingZeros(Value);
584 MaskLen = countPopulation(Value);
585 return true;
586}
587
588/// Return true if the argument contains a non-empty sequence of ones with the
589/// remainder zero (64 bit version.) If true, \p MaskIdx will specify the index
590/// of the lowest set bit and \p MaskLen is updated to specify the length of the
591/// mask, else neither are updated.
592inline bool isShiftedMask_64(uint64_t Value, unsigned &MaskIdx,
593 unsigned &MaskLen) {
594 if (!isShiftedMask_64(Value))
595 return false;
596 MaskIdx = countTrailingZeros(Value);
597 MaskLen = countPopulation(Value);
598 return true;
599}
600
601/// Compile time Log2.
602/// Valid only for positive powers of two.
603template <size_t kValue> constexpr inline size_t CTLog2() {
604 static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue),
605 "Value is not a valid power of 2");
606 return 1 + CTLog2<kValue / 2>();
607}
608
609template <> constexpr inline size_t CTLog2<1>() { return 0; }
610
611/// Return the log base 2 of the specified value.
612inline double Log2(double Value) {
613#if defined(__ANDROID_API__) && __ANDROID_API__ < 18
614 return __builtin_log(Value) / __builtin_log(2.0);
615#else
616 return log2(Value);
617#endif
618}
619
620/// Return the floor log base 2 of the specified value, -1 if the value is zero.
621/// (32 bit edition.)
622/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
623inline unsigned Log2_32(uint32_t Value) {
624 return 31 - countLeadingZeros(Value);
625}
626
627/// Return the floor log base 2 of the specified value, -1 if the value is zero.
628/// (64 bit edition.)
629inline unsigned Log2_64(uint64_t Value) {
630 return 63 - countLeadingZeros(Value);
631}
632
633/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
634/// (32 bit edition).
635/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
636inline unsigned Log2_32_Ceil(uint32_t Value) {
637 return 32 - countLeadingZeros(Value - 1);
638}
639
640/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
641/// (64 bit edition.)
642inline unsigned Log2_64_Ceil(uint64_t Value) {
643 return 64 - countLeadingZeros(Value - 1);
644}
645
646/// Return the greatest common divisor of the values using Euclid's algorithm.
647template <typename T>
648inline T greatestCommonDivisor(T A, T B) {
649 while (B) {
650 T Tmp = B;
651 B = A % B;
652 A = Tmp;
653 }
654 return A;
655}
656
657inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
658 return greatestCommonDivisor<uint64_t>(A, B);
659}
660
661/// This function takes a 64-bit integer and returns the bit equivalent double.
662inline double BitsToDouble(uint64_t Bits) {
663 double D;
664 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
665 memcpy(&D, &Bits, sizeof(Bits));
666 return D;
667}
668
669/// This function takes a 32-bit integer and returns the bit equivalent float.
670inline float BitsToFloat(uint32_t Bits) {
671 float F;
672 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
673 memcpy(&F, &Bits, sizeof(Bits));
674 return F;
675}
676
677/// This function takes a double and returns the bit equivalent 64-bit integer.
678/// Note that copying doubles around changes the bits of NaNs on some hosts,
679/// notably x86, so this routine cannot be used if these bits are needed.
680inline uint64_t DoubleToBits(double Double) {
681 uint64_t Bits;
682 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
683 memcpy(&Bits, &Double, sizeof(Double));
684 return Bits;
685}
686
687/// This function takes a float and returns the bit equivalent 32-bit integer.
688/// Note that copying floats around changes the bits of NaNs on some hosts,
689/// notably x86, so this routine cannot be used if these bits are needed.
690inline uint32_t FloatToBits(float Float) {
691 uint32_t Bits;
692 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
693 memcpy(&Bits, &Float, sizeof(Float));
694 return Bits;
695}
696
697/// A and B are either alignments or offsets. Return the minimum alignment that
698/// may be assumed after adding the two together.
699constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
700 // The largest power of 2 that divides both A and B.
701 //
702 // Replace "-Value" by "1+~Value" in the following commented code to avoid
703 // MSVC warning C4146
704 // return (A | B) & -(A | B);
705 return (A | B) & (1 + ~(A | B));
706}
707
708/// Returns the next power of two (in 64-bits) that is strictly greater than A.
709/// Returns zero on overflow.
710constexpr inline uint64_t NextPowerOf2(uint64_t A) {
711 A |= (A >> 1);
712 A |= (A >> 2);
713 A |= (A >> 4);
714 A |= (A >> 8);
715 A |= (A >> 16);
716 A |= (A >> 32);
717 return A + 1;
718}
719
720/// Returns the power of two which is less than or equal to the given value.
721/// Essentially, it is a floor operation across the domain of powers of two.
722inline uint64_t PowerOf2Floor(uint64_t A) {
723 if (!A) return 0;
724 return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
725}
726
727/// Returns the power of two which is greater than or equal to the given value.
728/// Essentially, it is a ceil operation across the domain of powers of two.
729inline uint64_t PowerOf2Ceil(uint64_t A) {
730 if (!A)
731 return 0;
732 return NextPowerOf2(A - 1);
733}
734
735/// Returns the next integer (mod 2**64) that is greater than or equal to
736/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
737///
738/// If non-zero \p Skew is specified, the return value will be a minimal
739/// integer that is greater than or equal to \p Value and equal to
740/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
741/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
742///
743/// Examples:
744/// \code
745/// alignTo(5, 8) = 8
746/// alignTo(17, 8) = 24
747/// alignTo(~0LL, 8) = 0
748/// alignTo(321, 255) = 510
749///
750/// alignTo(5, 8, 7) = 7
751/// alignTo(17, 8, 1) = 17
752/// alignTo(~0LL, 8, 3) = 3
753/// alignTo(321, 255, 42) = 552
754/// \endcode
755inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
756 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 756, __extension__
__PRETTY_FUNCTION__))
;
757 Skew %= Align;
758 return (Value + Align - 1 - Skew) / Align * Align + Skew;
759}
760
761/// Returns the next integer (mod 2**64) that is greater than or equal to
762/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
763template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
764 static_assert(Align != 0u, "Align must be non-zero");
765 return (Value + Align - 1) / Align * Align;
766}
767
768/// Returns the integer ceil(Numerator / Denominator).
769inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
770 return alignTo(Numerator, Denominator) / Denominator;
771}
772
773/// Returns the integer nearest(Numerator / Denominator).
774inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) {
775 return (Numerator + (Denominator / 2)) / Denominator;
776}
777
778/// Returns the largest uint64_t less than or equal to \p Value and is
779/// \p Skew mod \p Align. \p Align must be non-zero
780inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
781 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 781, __extension__
__PRETTY_FUNCTION__))
;
782 Skew %= Align;
783 return (Value - Skew) / Align * Align + Skew;
784}
785
786/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
787/// Requires 0 < B <= 32.
788template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
789 static_assert(B > 0, "Bit width can't be 0.");
790 static_assert(B <= 32, "Bit width out of range.");
791 return int32_t(X << (32 - B)) >> (32 - B);
792}
793
794/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
795/// Requires 0 < B <= 32.
796inline int32_t SignExtend32(uint32_t X, unsigned B) {
797 assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0."
) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 797, __extension__
__PRETTY_FUNCTION__))
;
798 assert(B <= 32 && "Bit width out of range.")(static_cast <bool> (B <= 32 && "Bit width out of range."
) ? void (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\""
, "llvm/include/llvm/Support/MathExtras.h", 798, __extension__
__PRETTY_FUNCTION__))
;
799 return int32_t(X << (32 - B)) >> (32 - B);
800}
801
802/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
803/// Requires 0 < B <= 64.
804template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
805 static_assert(B > 0, "Bit width can't be 0.");
806 static_assert(B <= 64, "Bit width out of range.");
807 return int64_t(x << (64 - B)) >> (64 - B);
808}
809
810/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
811/// Requires 0 < B <= 64.
812inline int64_t SignExtend64(uint64_t X, unsigned B) {
813 assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0."
) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "llvm/include/llvm/Support/MathExtras.h", 813, __extension__
__PRETTY_FUNCTION__))
;
814 assert(B <= 64 && "Bit width out of range.")(static_cast <bool> (B <= 64 && "Bit width out of range."
) ? void (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\""
, "llvm/include/llvm/Support/MathExtras.h", 814, __extension__
__PRETTY_FUNCTION__))
;
815 return int64_t(X << (64 - B)) >> (64 - B);
816}
817
818/// Subtract two unsigned integers, X and Y, of type T and return the absolute
819/// value of the result.
820template <typename T>
821std::enable_if_t<std::is_unsigned<T>::value, T> AbsoluteDifference(T X, T Y) {
822 return X > Y ? (X - Y) : (Y - X);
823}
824
825/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
826/// maximum representable value of T on overflow. ResultOverflowed indicates if
827/// the result is larger than the maximum representable value of type T.
828template <typename T>
829std::enable_if_t<std::is_unsigned<T>::value, T>
830SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
831 bool Dummy;
832 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
833 // Hacker's Delight, p. 29
834 T Z = X + Y;
835 Overflowed = (Z < X || Z < Y);
836 if (Overflowed)
837 return std::numeric_limits<T>::max();
838 else
839 return Z;
840}
841
842/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
843/// maximum representable value of T on overflow. ResultOverflowed indicates if
844/// the result is larger than the maximum representable value of type T.
845template <typename T>
846std::enable_if_t<std::is_unsigned<T>::value, T>
847SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
848 bool Dummy;
849 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
850
851 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
852 // because it fails for uint16_t (where multiplication can have undefined
853 // behavior due to promotion to int), and requires a division in addition
854 // to the multiplication.
855
856 Overflowed = false;
857
858 // Log2(Z) would be either Log2Z or Log2Z + 1.
859 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
860 // will necessarily be less than Log2Max as desired.
861 int Log2Z = Log2_64(X) + Log2_64(Y);
862 const T Max = std::numeric_limits<T>::max();
863 int Log2Max = Log2_64(Max);
864 if (Log2Z < Log2Max) {
865 return X * Y;
866 }
867 if (Log2Z > Log2Max) {
868 Overflowed = true;
869 return Max;
870 }
871
872 // We're going to use the top bit, and maybe overflow one
873 // bit past it. Multiply all but the bottom bit then add
874 // that on at the end.
875 T Z = (X >> 1) * Y;
876 if (Z & ~(Max >> 1)) {
877 Overflowed = true;
878 return Max;
879 }
880 Z <<= 1;
881 if (X & 1)
882 return SaturatingAdd(Z, Y, ResultOverflowed);
883
884 return Z;
885}
886
887/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
888/// the product. Clamp the result to the maximum representable value of T on
889/// overflow. ResultOverflowed indicates if the result is larger than the
890/// maximum representable value of type T.
891template <typename T>
892std::enable_if_t<std::is_unsigned<T>::value, T>
893SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
894 bool Dummy;
895 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
896
897 T Product = SaturatingMultiply(X, Y, &Overflowed);
898 if (Overflowed)
899 return Product;
900
901 return SaturatingAdd(A, Product, &Overflowed);
902}
903
904/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
905extern const float huge_valf;
906
907
908/// Add two signed integers, computing the two's complement truncated result,
909/// returning true if overflow occurred.
910template <typename T>
911std::enable_if_t<std::is_signed<T>::value, T> AddOverflow(T X, T Y, T &Result) {
912#if __has_builtin(__builtin_add_overflow)1
913 return __builtin_add_overflow(X, Y, &Result);
914#else
915 // Perform the unsigned addition.
916 using U = std::make_unsigned_t<T>;
917 const U UX = static_cast<U>(X);
918 const U UY = static_cast<U>(Y);
919 const U UResult = UX + UY;
920
921 // Convert to signed.
922 Result = static_cast<T>(UResult);
923
924 // Adding two positive numbers should result in a positive number.
925 if (X > 0 && Y > 0)
926 return Result <= 0;
927 // Adding two negatives should result in a negative number.
928 if (X < 0 && Y < 0)
929 return Result >= 0;
930 return false;
931#endif
932}
933
934/// Subtract two signed integers, computing the two's complement truncated
935/// result, returning true if an overflow ocurred.
936template <typename T>
937std::enable_if_t<std::is_signed<T>::value, T> SubOverflow(T X, T Y, T &Result) {
938#if __has_builtin(__builtin_sub_overflow)1
939 return __builtin_sub_overflow(X, Y, &Result);
940#else
941 // Perform the unsigned addition.
942 using U = std::make_unsigned_t<T>;
943 const U UX = static_cast<U>(X);
944 const U UY = static_cast<U>(Y);
945 const U UResult = UX - UY;
946
947 // Convert to signed.
948 Result = static_cast<T>(UResult);
949
950 // Subtracting a positive number from a negative results in a negative number.
951 if (X <= 0 && Y > 0)
952 return Result >= 0;
953 // Subtracting a negative number from a positive results in a positive number.
954 if (X >= 0 && Y < 0)
955 return Result <= 0;
956 return false;
957#endif
958}
959
960/// Multiply two signed integers, computing the two's complement truncated
961/// result, returning true if an overflow ocurred.
962template <typename T>
963std::enable_if_t<std::is_signed<T>::value, T> MulOverflow(T X, T Y, T &Result) {
964 // Perform the unsigned multiplication on absolute values.
965 using U = std::make_unsigned_t<T>;
966 const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X);
967 const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y);
968 const U UResult = UX * UY;
969
970 // Convert to signed.
971 const bool IsNegative = (X < 0) ^ (Y < 0);
972 Result = IsNegative ? (0 - UResult) : UResult;
973
974 // If any of the args was 0, result is 0 and no overflow occurs.
975 if (UX == 0 || UY == 0)
976 return false;
977
978 // UX and UY are in [1, 2^n], where n is the number of digits.
979 // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
980 // positive) divided by an argument compares to the other.
981 if (IsNegative)
982 return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY;
983 else
984 return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY;
985}
986
987} // End llvm namespace
988
989#endif

/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/limits

1// The template and inlines for the numeric_limits classes. -*- C++ -*-
2
3// Copyright (C) 1999-2020 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/** @file include/limits
26 * This is a Standard C++ Library header.
27 */
28
29// Note: this is not a conforming implementation.
30// Written by Gabriel Dos Reis <gdr@codesourcery.com>
31
32//
33// ISO 14882:1998
34// 18.2.1
35//
36
37#ifndef _GLIBCXX_NUMERIC_LIMITS1
38#define _GLIBCXX_NUMERIC_LIMITS1 1
39
40#pragma GCC system_header
41
42#include <bits/c++config.h>
43
44//
45// The numeric_limits<> traits document implementation-defined aspects
46// of fundamental arithmetic data types (integers and floating points).
47// From Standard C++ point of view, there are 14 such types:
48// * integers
49// bool (1)
50// char, signed char, unsigned char, wchar_t (4)
51// short, unsigned short (2)
52// int, unsigned (2)
53// long, unsigned long (2)
54//
55// * floating points
56// float (1)
57// double (1)
58// long double (1)
59//
60// GNU C++ understands (where supported by the host C-library)
61// * integer
62// long long, unsigned long long (2)
63//
64// which brings us to 16 fundamental arithmetic data types in GNU C++.
65//
66//
67// Since a numeric_limits<> is a bit tricky to get right, we rely on
68// an interface composed of macros which should be defined in config/os
69// or config/cpu when they differ from the generic (read arbitrary)
70// definitions given here.
71//
72
73// These values can be overridden in the target configuration file.
74// The default values are appropriate for many 32-bit targets.
75
76// GCC only intrinsically supports modulo integral types. The only remaining
77// integral exceptional values is division by zero. Only targets that do not
78// signal division by zero in some "hard to ignore" way should use false.
79#ifndef __glibcxx_integral_trapstrue
80# define __glibcxx_integral_trapstrue true
81#endif
82
83// float
84//
85
86// Default values. Should be overridden in configuration files if necessary.
87
88#ifndef __glibcxx_float_has_denorm_loss
89# define __glibcxx_float_has_denorm_loss false
90#endif
91#ifndef __glibcxx_float_traps
92# define __glibcxx_float_traps false
93#endif
94#ifndef __glibcxx_float_tinyness_before
95# define __glibcxx_float_tinyness_before false
96#endif
97
98// double
99
100// Default values. Should be overridden in configuration files if necessary.
101
102#ifndef __glibcxx_double_has_denorm_loss
103# define __glibcxx_double_has_denorm_loss false
104#endif
105#ifndef __glibcxx_double_traps
106# define __glibcxx_double_traps false
107#endif
108#ifndef __glibcxx_double_tinyness_before
109# define __glibcxx_double_tinyness_before false
110#endif
111
112// long double
113
114// Default values. Should be overridden in configuration files if necessary.
115
116#ifndef __glibcxx_long_double_has_denorm_loss
117# define __glibcxx_long_double_has_denorm_loss false
118#endif
119#ifndef __glibcxx_long_double_traps
120# define __glibcxx_long_double_traps false
121#endif
122#ifndef __glibcxx_long_double_tinyness_before
123# define __glibcxx_long_double_tinyness_before false
124#endif
125
126// You should not need to define any macros below this point.
127
128#define __glibcxx_signed_b(T,B)((T)(-1) < 0) ((T)(-1) < 0)
129
130#define __glibcxx_min_b(T,B)(((T)(-1) < 0) ? -(((T)(-1) < 0) ? (((((T)1 << ((
B - ((T)(-1) < 0)) - 1)) - 1) << 1) + 1) : ~(T)0) - 1
: (T)0)
\
131 (__glibcxx_signed_b (T,B)((T)(-1) < 0) ? -__glibcxx_max_b (T,B)(((T)(-1) < 0) ? (((((T)1 << ((B - ((T)(-1) < 0))
- 1)) - 1) << 1) + 1) : ~(T)0)
- 1 : (T)0)
132
133#define __glibcxx_max_b(T,B)(((T)(-1) < 0) ? (((((T)1 << ((B - ((T)(-1) < 0))
- 1)) - 1) << 1) + 1) : ~(T)0)
\
134 (__glibcxx_signed_b (T,B)((T)(-1) < 0) ? \
135 (((((T)1 << (__glibcxx_digits_b (T,B)(B - ((T)(-1) < 0)) - 1)) - 1) << 1) + 1) : ~(T)0)
136
137#define __glibcxx_digits_b(T,B)(B - ((T)(-1) < 0)) \
138 (B - __glibcxx_signed_b (T,B)((T)(-1) < 0))
139
140// The fraction 643/2136 approximates log10(2) to 7 significant digits.
141#define __glibcxx_digits10_b(T,B)((B - ((T)(-1) < 0)) * 643L / 2136) \
142 (__glibcxx_digits_b (T,B)(B - ((T)(-1) < 0)) * 643L / 2136)
143
144#define __glibcxx_signed(T) \
145 __glibcxx_signed_b (T, sizeof(T) * __CHAR_BIT__)((T)(-1) < 0)
146#define __glibcxx_min(T) \
147 __glibcxx_min_b (T, sizeof(T) * __CHAR_BIT__)(((T)(-1) < 0) ? -(((T)(-1) < 0) ? (((((T)1 << ((
sizeof(T) * 8 - ((T)(-1) < 0)) - 1)) - 1) << 1) + 1)
: ~(T)0) - 1 : (T)0)
148#define __glibcxx_max(T) \
149 __glibcxx_max_b (T, sizeof(T) * __CHAR_BIT__)(((T)(-1) < 0) ? (((((T)1 << ((sizeof(T) * 8 - ((T)(
-1) < 0)) - 1)) - 1) << 1) + 1) : ~(T)0)
150#define __glibcxx_digits(T) \
151 __glibcxx_digits_b (T, sizeof(T) * __CHAR_BIT__)(sizeof(T) * 8 - ((T)(-1) < 0))
152#define __glibcxx_digits10(T) \
153 __glibcxx_digits10_b (T, sizeof(T) * __CHAR_BIT__)((sizeof(T) * 8 - ((T)(-1) < 0)) * 643L / 2136)
154
155#define __glibcxx_max_digits10(T) \
156 (2 + (T) * 643L / 2136)
157
158namespace std _GLIBCXX_VISIBILITY(default)__attribute__ ((__visibility__ ("default")))
159{
160_GLIBCXX_BEGIN_NAMESPACE_VERSION
161
162 /**
163 * @brief Describes the rounding style for floating-point types.
164 *
165 * This is used in the std::numeric_limits class.
166 */
167 enum float_round_style
168 {
169 round_indeterminate = -1, /// Intermediate.
170 round_toward_zero = 0, /// To zero.
171 round_to_nearest = 1, /// To the nearest representable value.
172 round_toward_infinity = 2, /// To infinity.
173 round_toward_neg_infinity = 3 /// To negative infinity.
174 };
175
176 /**
177 * @brief Describes the denormalization for floating-point types.
178 *
179 * These values represent the presence or absence of a variable number
180 * of exponent bits. This type is used in the std::numeric_limits class.
181 */
182 enum float_denorm_style
183 {
184 /// Indeterminate at compile time whether denormalized values are allowed.
185 denorm_indeterminate = -1,
186 /// The type does not allow denormalized values.
187 denorm_absent = 0,
188 /// The type allows denormalized values.
189 denorm_present = 1
190 };
191
192 /**
193 * @brief Part of std::numeric_limits.
194 *
195 * The @c static @c const members are usable as integral constant
196 * expressions.
197 *
198 * @note This is a separate class for purposes of efficiency; you
199 * should only access these members as part of an instantiation
200 * of the std::numeric_limits class.
201 */
202 struct __numeric_limits_base
203 {
204 /** This will be true for all fundamental types (which have
205 specializations), and false for everything else. */
206 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = false;
207
208 /** The number of @c radix digits that be represented without change: for
209 integer types, the number of non-sign bits in the mantissa; for
210 floating types, the number of @c radix digits in the mantissa. */
211 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = 0;
212
213 /** The number of base 10 digits that can be represented without change. */
214 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 = 0;
215
216#if __cplusplus201402L >= 201103L
217 /** The number of base 10 digits required to ensure that values which
218 differ are always differentiated. */
219 static constexpr int max_digits10 = 0;
220#endif
221
222 /** True if the type is signed. */
223 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = false;
224
225 /** True if the type is integer. */
226 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = false;
227
228 /** True if the type uses an exact representation. All integer types are
229 exact, but not all exact types are integer. For example, rational and
230 fixed-exponent representations are exact but not integer. */
231 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = false;
232
233 /** For integer types, specifies the base of the representation. For
234 floating types, specifies the base of the exponent representation. */
235 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 0;
236
237 /** The minimum negative integer such that @c radix raised to the power of
238 (one less than that integer) is a normalized floating point number. */
239 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
240
241 /** The minimum negative integer such that 10 raised to that power is in
242 the range of normalized floating point numbers. */
243 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
244
245 /** The maximum positive integer such that @c radix raised to the power of
246 (one less than that integer) is a representable finite floating point
247 number. */
248 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
249
250 /** The maximum positive integer such that 10 raised to that power is in
251 the range of representable finite floating point numbers. */
252 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
253
254 /** True if the type has a representation for positive infinity. */
255 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
256
257 /** True if the type has a representation for a quiet (non-signaling)
258 Not a Number. */
259 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
260
261 /** True if the type has a representation for a signaling
262 Not a Number. */
263 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
264
265 /** See std::float_denorm_style for more information. */
266 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm = denorm_absent;
267
268 /** True if loss of accuracy is detected as a denormalization loss,
269 rather than as an inexact result. */
270 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
271
272 /** True if-and-only-if the type adheres to the IEC 559 standard, also
273 known as IEEE 754. (Only makes sense for floating point types.) */
274 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
275
276 /** True if the set of values representable by the type is
277 finite. All built-in types are bounded, this member would be
278 false for arbitrary precision types. */
279 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = false;
280
281 /** True if the type is @e modulo. A type is modulo if, for any
282 operation involving +, -, or * on values of that type whose
283 result would fall outside the range [min(),max()], the value
284 returned differs from the true value by an integer multiple of
285 max() - min() + 1. On most machines, this is false for floating
286 types, true for unsigned integers, and true for signed integers.
287 See PR22200 about signed integers. */
288 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false;
289
290 /** True if trapping is implemented for this type. */
291 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = false;
292
293 /** True if tininess is detected before rounding. (see IEC 559) */
294 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
295
296 /** See std::float_round_style for more information. This is only
297 meaningful for floating types; integer types will all be
298 round_toward_zero. */
299 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style =
300 round_toward_zero;
301 };
302
303 /**
304 * @brief Properties of fundamental types.
305 *
306 * This class allows a program to obtain information about the
307 * representation of a fundamental type on a given platform. For
308 * non-fundamental types, the functions will return 0 and the data
309 * members will all be @c false.
310 */
311 template<typename _Tp>
312 struct numeric_limits : public __numeric_limits_base
313 {
314 /** The minimum finite value, or for floating types with
315 denormalization, the minimum positive normalized value. */
316 static _GLIBCXX_CONSTEXPRconstexpr _Tp
317 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return _Tp(); }
318
319 /** The maximum finite value. */
320 static _GLIBCXX_CONSTEXPRconstexpr _Tp
321 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return _Tp(); }
322
323#if __cplusplus201402L >= 201103L
324 /** A finite value x such that there is no other finite value y
325 * where y < x. */
326 static constexpr _Tp
327 lowest() noexcept { return _Tp(); }
328#endif
329
330 /** The @e machine @e epsilon: the difference between 1 and the least
331 value greater than 1 that is representable. */
332 static _GLIBCXX_CONSTEXPRconstexpr _Tp
333 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return _Tp(); }
334
335 /** The maximum rounding error measurement (see LIA-1). */
336 static _GLIBCXX_CONSTEXPRconstexpr _Tp
337 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return _Tp(); }
338
339 /** The representation of positive infinity, if @c has_infinity. */
340 static _GLIBCXX_CONSTEXPRconstexpr _Tp
341 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return _Tp(); }
342
343 /** The representation of a quiet Not a Number,
344 if @c has_quiet_NaN. */
345 static _GLIBCXX_CONSTEXPRconstexpr _Tp
346 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return _Tp(); }
347
348 /** The representation of a signaling Not a Number, if
349 @c has_signaling_NaN. */
350 static _GLIBCXX_CONSTEXPRconstexpr _Tp
351 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return _Tp(); }
352
353 /** The minimum positive denormalized value. For types where
354 @c has_denorm is false, this is the minimum positive normalized
355 value. */
356 static _GLIBCXX_CONSTEXPRconstexpr _Tp
357 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return _Tp(); }
358 };
359
360 // _GLIBCXX_RESOLVE_LIB_DEFECTS
361 // 559. numeric_limits<const T>
362
363 template<typename _Tp>
364 struct numeric_limits<const _Tp>
365 : public numeric_limits<_Tp> { };
366
367 template<typename _Tp>
368 struct numeric_limits<volatile _Tp>
369 : public numeric_limits<_Tp> { };
370
371 template<typename _Tp>
372 struct numeric_limits<const volatile _Tp>
373 : public numeric_limits<_Tp> { };
374
375 // Now there follow 16 explicit specializations. Yes, 16. Make sure
376 // you get the count right. (18 in C++11 mode, with char16_t and char32_t.)
377 // (+1 if char8_t is enabled.)
378
379 // _GLIBCXX_RESOLVE_LIB_DEFECTS
380 // 184. numeric_limits<bool> wording problems
381
382 /// numeric_limits<bool> specialization.
383 template<>
384 struct numeric_limits<bool>
385 {
386 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
387
388 static _GLIBCXX_CONSTEXPRconstexpr bool
389 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return false; }
390
391 static _GLIBCXX_CONSTEXPRconstexpr bool
392 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return true; }
393
394#if __cplusplus201402L >= 201103L
395 static constexpr bool
396 lowest() noexcept { return min(); }
397#endif
398 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = 1;
399 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 = 0;
400#if __cplusplus201402L >= 201103L
401 static constexpr int max_digits10 = 0;
402#endif
403 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = false;
404 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
405 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
406 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
407
408 static _GLIBCXX_CONSTEXPRconstexpr bool
409 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return false; }
410
411 static _GLIBCXX_CONSTEXPRconstexpr bool
412 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return false; }
413
414 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
415 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
416 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
417 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
418
419 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
420 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
421 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
422 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
423 = denorm_absent;
424 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
425
426 static _GLIBCXX_CONSTEXPRconstexpr bool
427 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return false; }
428
429 static _GLIBCXX_CONSTEXPRconstexpr bool
430 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return false; }
431
432 static _GLIBCXX_CONSTEXPRconstexpr bool
433 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return false; }
434
435 static _GLIBCXX_CONSTEXPRconstexpr bool
436 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return false; }
437
438 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
439 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
440 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false;
441
442 // It is not clear what it means for a boolean type to trap.
443 // This is a DR on the LWG issue list. Here, I use integer
444 // promotion semantics.
445 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
446 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
447 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
448 = round_toward_zero;
449 };
450
451 /// numeric_limits<char> specialization.
452 template<>
453 struct numeric_limits<char>
454 {
455 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
456
457 static _GLIBCXX_CONSTEXPRconstexpr char
458 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return __glibcxx_min(char); }
459
460 static _GLIBCXX_CONSTEXPRconstexpr char
461 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __glibcxx_max(char); }
462
463#if __cplusplus201402L >= 201103L
464 static constexpr char
465 lowest() noexcept { return min(); }
466#endif
467
468 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = __glibcxx_digits (char);
469 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 = __glibcxx_digits10 (char);
470#if __cplusplus201402L >= 201103L
471 static constexpr int max_digits10 = 0;
472#endif
473 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = __glibcxx_signed (char);
474 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
475 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
476 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
477
478 static _GLIBCXX_CONSTEXPRconstexpr char
479 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
480
481 static _GLIBCXX_CONSTEXPRconstexpr char
482 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
483
484 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
485 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
486 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
487 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
488
489 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
490 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
491 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
492 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
493 = denorm_absent;
494 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
495
496 static _GLIBCXX_CONSTEXPRconstexpr
497 char infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return char(); }
498
499 static _GLIBCXX_CONSTEXPRconstexpr char
500 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return char(); }
501
502 static _GLIBCXX_CONSTEXPRconstexpr char
503 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return char(); }
504
505 static _GLIBCXX_CONSTEXPRconstexpr char
506 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<char>(0); }
507
508 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
509 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
510 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = !is_signed;
511
512 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
513 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
514 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
515 = round_toward_zero;
516 };
517
518 /// numeric_limits<signed char> specialization.
519 template<>
520 struct numeric_limits<signed char>
521 {
522 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
523
524 static _GLIBCXX_CONSTEXPRconstexpr signed char
525 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return -__SCHAR_MAX__127 - 1; }
526
527 static _GLIBCXX_CONSTEXPRconstexpr signed char
528 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __SCHAR_MAX__127; }
529
530#if __cplusplus201402L >= 201103L
531 static constexpr signed char
532 lowest() noexcept { return min(); }
533#endif
534
535 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = __glibcxx_digits (signed char);
536 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10
537 = __glibcxx_digits10 (signed char);
538#if __cplusplus201402L >= 201103L
539 static constexpr int max_digits10 = 0;
540#endif
541 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = true;
542 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
543 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
544 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
545
546 static _GLIBCXX_CONSTEXPRconstexpr signed char
547 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
548
549 static _GLIBCXX_CONSTEXPRconstexpr signed char
550 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
551
552 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
553 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
554 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
555 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
556
557 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
558 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
559 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
560 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
561 = denorm_absent;
562 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
563
564 static _GLIBCXX_CONSTEXPRconstexpr signed char
565 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<signed char>(0); }
566
567 static _GLIBCXX_CONSTEXPRconstexpr signed char
568 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<signed char>(0); }
569
570 static _GLIBCXX_CONSTEXPRconstexpr signed char
571 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
572 { return static_cast<signed char>(0); }
573
574 static _GLIBCXX_CONSTEXPRconstexpr signed char
575 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept
576 { return static_cast<signed char>(0); }
577
578 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
579 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
580 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false;
581
582 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
583 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
584 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
585 = round_toward_zero;
586 };
587
588 /// numeric_limits<unsigned char> specialization.
589 template<>
590 struct numeric_limits<unsigned char>
591 {
592 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
593
594 static _GLIBCXX_CONSTEXPRconstexpr unsigned char
595 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
596
597 static _GLIBCXX_CONSTEXPRconstexpr unsigned char
598 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __SCHAR_MAX__127 * 2U + 1; }
599
600#if __cplusplus201402L >= 201103L
601 static constexpr unsigned char
602 lowest() noexcept { return min(); }
603#endif
604
605 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits
606 = __glibcxx_digits (unsigned char);
607 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10
608 = __glibcxx_digits10 (unsigned char);
609#if __cplusplus201402L >= 201103L
610 static constexpr int max_digits10 = 0;
611#endif
612 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = false;
613 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
614 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
615 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
616
617 static _GLIBCXX_CONSTEXPRconstexpr unsigned char
618 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
619
620 static _GLIBCXX_CONSTEXPRconstexpr unsigned char
621 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
622
623 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
624 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
625 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
626 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
627
628 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
629 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
630 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
631 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
632 = denorm_absent;
633 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
634
635 static _GLIBCXX_CONSTEXPRconstexpr unsigned char
636 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept
637 { return static_cast<unsigned char>(0); }
638
639 static _GLIBCXX_CONSTEXPRconstexpr unsigned char
640 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
641 { return static_cast<unsigned char>(0); }
642
643 static _GLIBCXX_CONSTEXPRconstexpr unsigned char
644 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
645 { return static_cast<unsigned char>(0); }
646
647 static _GLIBCXX_CONSTEXPRconstexpr unsigned char
648 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept
649 { return static_cast<unsigned char>(0); }
650
651 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
652 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
653 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = true;
654
655 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
656 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
657 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
658 = round_toward_zero;
659 };
660
661 /// numeric_limits<wchar_t> specialization.
662 template<>
663 struct numeric_limits<wchar_t>
664 {
665 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
666
667 static _GLIBCXX_CONSTEXPRconstexpr wchar_t
668 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return __glibcxx_min (wchar_t); }
669
670 static _GLIBCXX_CONSTEXPRconstexpr wchar_t
671 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __glibcxx_max (wchar_t); }
672
673#if __cplusplus201402L >= 201103L
674 static constexpr wchar_t
675 lowest() noexcept { return min(); }
676#endif
677
678 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = __glibcxx_digits (wchar_t);
679 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10
680 = __glibcxx_digits10 (wchar_t);
681#if __cplusplus201402L >= 201103L
682 static constexpr int max_digits10 = 0;
683#endif
684 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = __glibcxx_signed (wchar_t);
685 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
686 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
687 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
688
689 static _GLIBCXX_CONSTEXPRconstexpr wchar_t
690 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
691
692 static _GLIBCXX_CONSTEXPRconstexpr wchar_t
693 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
694
695 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
696 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
697 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
698 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
699
700 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
701 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
702 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
703 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
704 = denorm_absent;
705 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
706
707 static _GLIBCXX_CONSTEXPRconstexpr wchar_t
708 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return wchar_t(); }
709
710 static _GLIBCXX_CONSTEXPRconstexpr wchar_t
711 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return wchar_t(); }
712
713 static _GLIBCXX_CONSTEXPRconstexpr wchar_t
714 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return wchar_t(); }
715
716 static _GLIBCXX_CONSTEXPRconstexpr wchar_t
717 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return wchar_t(); }
718
719 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
720 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
721 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = !is_signed;
722
723 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
724 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
725 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
726 = round_toward_zero;
727 };
728
729#if _GLIBCXX_USE_CHAR8_T
730 /// numeric_limits<char8_t> specialization.
731 template<>
732 struct numeric_limits<char8_t>
733 {
734 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
735
736 static _GLIBCXX_CONSTEXPRconstexpr char8_t
737 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return __glibcxx_min (char8_t); }
738
739 static _GLIBCXX_CONSTEXPRconstexpr char8_t
740 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __glibcxx_max (char8_t); }
741
742 static _GLIBCXX_CONSTEXPRconstexpr char8_t
743 lowest() _GLIBCXX_USE_NOEXCEPTnoexcept { return min(); }
744
745 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = __glibcxx_digits (char8_t);
746 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 = __glibcxx_digits10 (char8_t);
747 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_digits10 = 0;
748 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = __glibcxx_signed (char8_t);
749 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
750 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
751 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
752
753 static _GLIBCXX_CONSTEXPRconstexpr char8_t
754 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
755
756 static _GLIBCXX_CONSTEXPRconstexpr char8_t
757 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
758
759 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
760 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
761 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
762 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
763
764 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
765 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
766 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
767 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
768 = denorm_absent;
769 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
770
771 static _GLIBCXX_CONSTEXPRconstexpr char8_t
772 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return char8_t(); }
773
774 static _GLIBCXX_CONSTEXPRconstexpr char8_t
775 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return char8_t(); }
776
777 static _GLIBCXX_CONSTEXPRconstexpr char8_t
778 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return char8_t(); }
779
780 static _GLIBCXX_CONSTEXPRconstexpr char8_t
781 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return char8_t(); }
782
783 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
784 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
785 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = !is_signed;
786
787 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
788 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
789 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
790 = round_toward_zero;
791 };
792#endif
793
794#if __cplusplus201402L >= 201103L
795 /// numeric_limits<char16_t> specialization.
796 template<>
797 struct numeric_limits<char16_t>
798 {
799 static constexpr bool is_specialized = true;
800
801 static constexpr char16_t
802 min() noexcept { return __glibcxx_min (char16_t); }
803
804 static constexpr char16_t
805 max() noexcept { return __glibcxx_max (char16_t); }
806
807 static constexpr char16_t
808 lowest() noexcept { return min(); }
809
810 static constexpr int digits = __glibcxx_digits (char16_t);
811 static constexpr int digits10 = __glibcxx_digits10 (char16_t);
812 static constexpr int max_digits10 = 0;
813 static constexpr bool is_signed = __glibcxx_signed (char16_t);
814 static constexpr bool is_integer = true;
815 static constexpr bool is_exact = true;
816 static constexpr int radix = 2;
817
818 static constexpr char16_t
819 epsilon() noexcept { return 0; }
820
821 static constexpr char16_t
822 round_error() noexcept { return 0; }
823
824 static constexpr int min_exponent = 0;
825 static constexpr int min_exponent10 = 0;
826 static constexpr int max_exponent = 0;
827 static constexpr int max_exponent10 = 0;
828
829 static constexpr bool has_infinity = false;
830 static constexpr bool has_quiet_NaN = false;
831 static constexpr bool has_signaling_NaN = false;
832 static constexpr float_denorm_style has_denorm = denorm_absent;
833 static constexpr bool has_denorm_loss = false;
834
835 static constexpr char16_t
836 infinity() noexcept { return char16_t(); }
837
838 static constexpr char16_t
839 quiet_NaN() noexcept { return char16_t(); }
840
841 static constexpr char16_t
842 signaling_NaN() noexcept { return char16_t(); }
843
844 static constexpr char16_t
845 denorm_min() noexcept { return char16_t(); }
846
847 static constexpr bool is_iec559 = false;
848 static constexpr bool is_bounded = true;
849 static constexpr bool is_modulo = !is_signed;
850
851 static constexpr bool traps = __glibcxx_integral_trapstrue;
852 static constexpr bool tinyness_before = false;
853 static constexpr float_round_style round_style = round_toward_zero;
854 };
855
856 /// numeric_limits<char32_t> specialization.
857 template<>
858 struct numeric_limits<char32_t>
859 {
860 static constexpr bool is_specialized = true;
861
862 static constexpr char32_t
863 min() noexcept { return __glibcxx_min (char32_t); }
864
865 static constexpr char32_t
866 max() noexcept { return __glibcxx_max (char32_t); }
867
868 static constexpr char32_t
869 lowest() noexcept { return min(); }
870
871 static constexpr int digits = __glibcxx_digits (char32_t);
872 static constexpr int digits10 = __glibcxx_digits10 (char32_t);
873 static constexpr int max_digits10 = 0;
874 static constexpr bool is_signed = __glibcxx_signed (char32_t);
875 static constexpr bool is_integer = true;
876 static constexpr bool is_exact = true;
877 static constexpr int radix = 2;
878
879 static constexpr char32_t
880 epsilon() noexcept { return 0; }
881
882 static constexpr char32_t
883 round_error() noexcept { return 0; }
884
885 static constexpr int min_exponent = 0;
886 static constexpr int min_exponent10 = 0;
887 static constexpr int max_exponent = 0;
888 static constexpr int max_exponent10 = 0;
889
890 static constexpr bool has_infinity = false;
891 static constexpr bool has_quiet_NaN = false;
892 static constexpr bool has_signaling_NaN = false;
893 static constexpr float_denorm_style has_denorm = denorm_absent;
894 static constexpr bool has_denorm_loss = false;
895
896 static constexpr char32_t
897 infinity() noexcept { return char32_t(); }
898
899 static constexpr char32_t
900 quiet_NaN() noexcept { return char32_t(); }
901
902 static constexpr char32_t
903 signaling_NaN() noexcept { return char32_t(); }
904
905 static constexpr char32_t
906 denorm_min() noexcept { return char32_t(); }
907
908 static constexpr bool is_iec559 = false;
909 static constexpr bool is_bounded = true;
910 static constexpr bool is_modulo = !is_signed;
911
912 static constexpr bool traps = __glibcxx_integral_trapstrue;
913 static constexpr bool tinyness_before = false;
914 static constexpr float_round_style round_style = round_toward_zero;
915 };
916#endif
917
918 /// numeric_limits<short> specialization.
919 template<>
920 struct numeric_limits<short>
921 {
922 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
923
924 static _GLIBCXX_CONSTEXPRconstexpr short
925 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return -__SHRT_MAX__32767 - 1; }
926
927 static _GLIBCXX_CONSTEXPRconstexpr short
928 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __SHRT_MAX__32767; }
929
930#if __cplusplus201402L >= 201103L
931 static constexpr short
932 lowest() noexcept { return min(); }
933#endif
934
935 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = __glibcxx_digits (short);
936 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 = __glibcxx_digits10 (short);
937#if __cplusplus201402L >= 201103L
938 static constexpr int max_digits10 = 0;
939#endif
940 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = true;
941 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
942 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
943 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
944
945 static _GLIBCXX_CONSTEXPRconstexpr short
946 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
947
948 static _GLIBCXX_CONSTEXPRconstexpr short
949 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
950
951 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
952 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
953 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
954 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
955
956 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
957 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
958 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
959 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
960 = denorm_absent;
961 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
962
963 static _GLIBCXX_CONSTEXPRconstexpr short
964 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return short(); }
965
966 static _GLIBCXX_CONSTEXPRconstexpr short
967 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return short(); }
968
969 static _GLIBCXX_CONSTEXPRconstexpr short
970 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return short(); }
971
972 static _GLIBCXX_CONSTEXPRconstexpr short
973 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return short(); }
974
975 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
976 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
977 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false;
978
979 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
980 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
981 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
982 = round_toward_zero;
983 };
984
985 /// numeric_limits<unsigned short> specialization.
986 template<>
987 struct numeric_limits<unsigned short>
988 {
989 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
990
991 static _GLIBCXX_CONSTEXPRconstexpr unsigned short
992 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
993
994 static _GLIBCXX_CONSTEXPRconstexpr unsigned short
995 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __SHRT_MAX__32767 * 2U + 1; }
996
997#if __cplusplus201402L >= 201103L
998 static constexpr unsigned short
999 lowest() noexcept { return min(); }
1000#endif
1001
1002 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits
1003 = __glibcxx_digits (unsigned short);
1004 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10
1005 = __glibcxx_digits10 (unsigned short);
1006#if __cplusplus201402L >= 201103L
1007 static constexpr int max_digits10 = 0;
1008#endif
1009 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = false;
1010 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
1011 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
1012 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
1013
1014 static _GLIBCXX_CONSTEXPRconstexpr unsigned short
1015 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1016
1017 static _GLIBCXX_CONSTEXPRconstexpr unsigned short
1018 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1019
1020 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
1021 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
1022 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
1023 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
1024
1025 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
1026 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
1027 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
1028 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
1029 = denorm_absent;
1030 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
1031
1032 static _GLIBCXX_CONSTEXPRconstexpr unsigned short
1033 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept
1034 { return static_cast<unsigned short>(0); }
1035
1036 static _GLIBCXX_CONSTEXPRconstexpr unsigned short
1037 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
1038 { return static_cast<unsigned short>(0); }
1039
1040 static _GLIBCXX_CONSTEXPRconstexpr unsigned short
1041 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
1042 { return static_cast<unsigned short>(0); }
1043
1044 static _GLIBCXX_CONSTEXPRconstexpr unsigned short
1045 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept
1046 { return static_cast<unsigned short>(0); }
1047
1048 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
1049 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
1050 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = true;
1051
1052 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
1053 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
1054 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
1055 = round_toward_zero;
1056 };
1057
1058 /// numeric_limits<int> specialization.
1059 template<>
1060 struct numeric_limits<int>
1061 {
1062 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
1063
1064 static _GLIBCXX_CONSTEXPRconstexpr int
1065 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return -__INT_MAX__2147483647 - 1; }
1066
1067 static _GLIBCXX_CONSTEXPRconstexpr int
1068 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __INT_MAX__2147483647; }
1069
1070#if __cplusplus201402L >= 201103L
1071 static constexpr int
1072 lowest() noexcept { return min(); }
1073#endif
1074
1075 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = __glibcxx_digits (int);
1076 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 = __glibcxx_digits10 (int);
1077#if __cplusplus201402L >= 201103L
1078 static constexpr int max_digits10 = 0;
1079#endif
1080 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = true;
1081 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
1082 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
1083 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
1084
1085 static _GLIBCXX_CONSTEXPRconstexpr int
1086 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1087
1088 static _GLIBCXX_CONSTEXPRconstexpr int
1089 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1090
1091 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
1092 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
1093 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
1094 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
1095
1096 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
1097 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
1098 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
1099 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
1100 = denorm_absent;
1101 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
1102
1103 static _GLIBCXX_CONSTEXPRconstexpr int
1104 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<int>(0); }
1105
1106 static _GLIBCXX_CONSTEXPRconstexpr int
1107 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<int>(0); }
1108
1109 static _GLIBCXX_CONSTEXPRconstexpr int
1110 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<int>(0); }
1111
1112 static _GLIBCXX_CONSTEXPRconstexpr int
1113 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<int>(0); }
1114
1115 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
1116 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
1117 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false;
1118
1119 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
1120 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
1121 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
1122 = round_toward_zero;
1123 };
1124
1125 /// numeric_limits<unsigned int> specialization.
1126 template<>
1127 struct numeric_limits<unsigned int>
1128 {
1129 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
1130
1131 static _GLIBCXX_CONSTEXPRconstexpr unsigned int
1132 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1133
1134 static _GLIBCXX_CONSTEXPRconstexpr unsigned int
1135 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __INT_MAX__2147483647 * 2U + 1; }
1136
1137#if __cplusplus201402L >= 201103L
1138 static constexpr unsigned int
1139 lowest() noexcept { return min(); }
1140#endif
1141
1142 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits
1143 = __glibcxx_digits (unsigned int);
1144 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10
1145 = __glibcxx_digits10 (unsigned int);
1146#if __cplusplus201402L >= 201103L
1147 static constexpr int max_digits10 = 0;
1148#endif
1149 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = false;
1150 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
1151 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
1152 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
1153
1154 static _GLIBCXX_CONSTEXPRconstexpr unsigned int
1155 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1156
1157 static _GLIBCXX_CONSTEXPRconstexpr unsigned int
1158 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1159
1160 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
1161 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
1162 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
1163 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
1164
1165 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
1166 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
1167 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
1168 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
1169 = denorm_absent;
1170 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
1171
1172 static _GLIBCXX_CONSTEXPRconstexpr unsigned int
1173 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<unsigned int>(0); }
1174
1175 static _GLIBCXX_CONSTEXPRconstexpr unsigned int
1176 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
1177 { return static_cast<unsigned int>(0); }
1178
1179 static _GLIBCXX_CONSTEXPRconstexpr unsigned int
1180 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
1181 { return static_cast<unsigned int>(0); }
1182
1183 static _GLIBCXX_CONSTEXPRconstexpr unsigned int
1184 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept
1185 { return static_cast<unsigned int>(0); }
1186
1187 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
1188 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
1189 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = true;
1190
1191 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
1192 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
1193 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
1194 = round_toward_zero;
1195 };
1196
1197 /// numeric_limits<long> specialization.
1198 template<>
1199 struct numeric_limits<long>
1200 {
1201 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
1202
1203 static _GLIBCXX_CONSTEXPRconstexpr long
1204 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return -__LONG_MAX__9223372036854775807L - 1; }
1205
1206 static _GLIBCXX_CONSTEXPRconstexpr long
1207 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __LONG_MAX__9223372036854775807L; }
1208
1209#if __cplusplus201402L >= 201103L
1210 static constexpr long
1211 lowest() noexcept { return min(); }
1212#endif
1213
1214 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = __glibcxx_digits (long);
1215 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 = __glibcxx_digits10 (long);
1216#if __cplusplus201402L >= 201103L
1217 static constexpr int max_digits10 = 0;
1218#endif
1219 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = true;
1220 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
1221 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
1222 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
1223
1224 static _GLIBCXX_CONSTEXPRconstexpr long
1225 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1226
1227 static _GLIBCXX_CONSTEXPRconstexpr long
1228 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1229
1230 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
1231 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
1232 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
1233 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
1234
1235 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
1236 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
1237 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
1238 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
1239 = denorm_absent;
1240 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
1241
1242 static _GLIBCXX_CONSTEXPRconstexpr long
1243 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<long>(0); }
1244
1245 static _GLIBCXX_CONSTEXPRconstexpr long
1246 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<long>(0); }
1247
1248 static _GLIBCXX_CONSTEXPRconstexpr long
1249 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<long>(0); }
1250
1251 static _GLIBCXX_CONSTEXPRconstexpr long
1252 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<long>(0); }
1253
1254 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
1255 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
1256 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false;
1257
1258 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
1259 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
1260 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
1261 = round_toward_zero;
1262 };
1263
1264 /// numeric_limits<unsigned long> specialization.
1265 template<>
1266 struct numeric_limits<unsigned long>
1267 {
1268 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
1269
1270 static _GLIBCXX_CONSTEXPRconstexpr unsigned long
1271 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1272
1273 static _GLIBCXX_CONSTEXPRconstexpr unsigned long
1274 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __LONG_MAX__9223372036854775807L * 2UL + 1; }
17
Returning the value 18446744073709551615
1275
1276#if __cplusplus201402L >= 201103L
1277 static constexpr unsigned long
1278 lowest() noexcept { return min(); }
1279#endif
1280
1281 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits
1282 = __glibcxx_digits (unsigned long);
1283 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10
1284 = __glibcxx_digits10 (unsigned long);
1285#if __cplusplus201402L >= 201103L
1286 static constexpr int max_digits10 = 0;
1287#endif
1288 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = false;
1289 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
1290 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
1291 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
1292
1293 static _GLIBCXX_CONSTEXPRconstexpr unsigned long
1294 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1295
1296 static _GLIBCXX_CONSTEXPRconstexpr unsigned long
1297 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1298
1299 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
1300 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
1301 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
1302 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
1303
1304 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
1305 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
1306 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
1307 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
1308 = denorm_absent;
1309 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
1310
1311 static _GLIBCXX_CONSTEXPRconstexpr unsigned long
1312 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept
1313 { return static_cast<unsigned long>(0); }
1314
1315 static _GLIBCXX_CONSTEXPRconstexpr unsigned long
1316 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
1317 { return static_cast<unsigned long>(0); }
1318
1319 static _GLIBCXX_CONSTEXPRconstexpr unsigned long
1320 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
1321 { return static_cast<unsigned long>(0); }
1322
1323 static _GLIBCXX_CONSTEXPRconstexpr unsigned long
1324 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept
1325 { return static_cast<unsigned long>(0); }
1326
1327 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
1328 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
1329 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = true;
1330
1331 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
1332 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
1333 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
1334 = round_toward_zero;
1335 };
1336
1337 /// numeric_limits<long long> specialization.
1338 template<>
1339 struct numeric_limits<long long>
1340 {
1341 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
1342
1343 static _GLIBCXX_CONSTEXPRconstexpr long long
1344 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return -__LONG_LONG_MAX__9223372036854775807LL - 1; }
1345
1346 static _GLIBCXX_CONSTEXPRconstexpr long long
1347 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __LONG_LONG_MAX__9223372036854775807LL; }
1348
1349#if __cplusplus201402L >= 201103L
1350 static constexpr long long
1351 lowest() noexcept { return min(); }
1352#endif
1353
1354 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits
1355 = __glibcxx_digits (long long);
1356 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10
1357 = __glibcxx_digits10 (long long);
1358#if __cplusplus201402L >= 201103L
1359 static constexpr int max_digits10 = 0;
1360#endif
1361 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = true;
1362 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
1363 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
1364 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
1365
1366 static _GLIBCXX_CONSTEXPRconstexpr long long
1367 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1368
1369 static _GLIBCXX_CONSTEXPRconstexpr long long
1370 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1371
1372 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
1373 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
1374 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
1375 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
1376
1377 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
1378 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
1379 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
1380 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
1381 = denorm_absent;
1382 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
1383
1384 static _GLIBCXX_CONSTEXPRconstexpr long long
1385 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<long long>(0); }
1386
1387 static _GLIBCXX_CONSTEXPRconstexpr long long
1388 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<long long>(0); }
1389
1390 static _GLIBCXX_CONSTEXPRconstexpr long long
1391 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
1392 { return static_cast<long long>(0); }
1393
1394 static _GLIBCXX_CONSTEXPRconstexpr long long
1395 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return static_cast<long long>(0); }
1396
1397 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
1398 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
1399 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false;
1400
1401 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
1402 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
1403 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
1404 = round_toward_zero;
1405 };
1406
1407 /// numeric_limits<unsigned long long> specialization.
1408 template<>
1409 struct numeric_limits<unsigned long long>
1410 {
1411 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
1412
1413 static _GLIBCXX_CONSTEXPRconstexpr unsigned long long
1414 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1415
1416 static _GLIBCXX_CONSTEXPRconstexpr unsigned long long
1417 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __LONG_LONG_MAX__9223372036854775807LL * 2ULL + 1; }
1418
1419#if __cplusplus201402L >= 201103L
1420 static constexpr unsigned long long
1421 lowest() noexcept { return min(); }
1422#endif
1423
1424 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits
1425 = __glibcxx_digits (unsigned long long);
1426 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10
1427 = __glibcxx_digits10 (unsigned long long);
1428#if __cplusplus201402L >= 201103L
1429 static constexpr int max_digits10 = 0;
1430#endif
1431 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = false;
1432 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true;
1433 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true;
1434 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2;
1435
1436 static _GLIBCXX_CONSTEXPRconstexpr unsigned long long
1437 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1438
1439 static _GLIBCXX_CONSTEXPRconstexpr unsigned long long
1440 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; }
1441
1442 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0;
1443 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0;
1444 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0;
1445 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0;
1446
1447 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false;
1448 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false;
1449 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false;
1450 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
1451 = denorm_absent;
1452 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false;
1453
1454 static _GLIBCXX_CONSTEXPRconstexpr unsigned long long
1455 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept
1456 { return static_cast<unsigned long long>(0); }
1457
1458 static _GLIBCXX_CONSTEXPRconstexpr unsigned long long
1459 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
1460 { return static_cast<unsigned long long>(0); }
1461
1462 static _GLIBCXX_CONSTEXPRconstexpr unsigned long long
1463 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept
1464 { return static_cast<unsigned long long>(0); }
1465
1466 static _GLIBCXX_CONSTEXPRconstexpr unsigned long long
1467 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept
1468 { return static_cast<unsigned long long>(0); }
1469
1470 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false;
1471 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
1472 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = true;
1473
1474 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue;
1475 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false;
1476 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
1477 = round_toward_zero;
1478 };
1479
1480#define __INT_N(TYPE, BITSIZE, EXT, UEXT) \
1481 template<> \
1482 struct numeric_limits<TYPE> \
1483 { \
1484 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true; \
1485 \
1486 static _GLIBCXX_CONSTEXPRconstexpr TYPE \
1487 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return __glibcxx_min_b (TYPE, BITSIZE)(((TYPE)(-1) < 0) ? -(((TYPE)(-1) < 0) ? (((((TYPE)1 <<
((BITSIZE - ((TYPE)(-1) < 0)) - 1)) - 1) << 1) + 1)
: ~(TYPE)0) - 1 : (TYPE)0)
; } \
1488 \
1489 static _GLIBCXX_CONSTEXPRconstexpr TYPE \
1490 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __glibcxx_max_b (TYPE, BITSIZE)(((TYPE)(-1) < 0) ? (((((TYPE)1 << ((BITSIZE - ((TYPE
)(-1) < 0)) - 1)) - 1) << 1) + 1) : ~(TYPE)0)
; } \
1491 \
1492 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits \
1493 = BITSIZE - 1; \
1494 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 \
1495 = (BITSIZE - 1) * 643L / 2136; \
1496 \
1497 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = true; \
1498 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true; \
1499 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true; \
1500 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2; \
1501 \
1502 static _GLIBCXX_CONSTEXPRconstexpr TYPE \
1503 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; } \
1504 \
1505 static _GLIBCXX_CONSTEXPRconstexpr TYPE \
1506 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; } \
1507 \
1508 EXT \
1509 \
1510 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0; \
1511 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0; \
1512 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0; \
1513 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0; \
1514 \
1515 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false; \
1516 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false; \
1517 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false; \
1518 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm \
1519 = denorm_absent; \
1520 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false; \
1521 \
1522 static _GLIBCXX_CONSTEXPRconstexpr TYPE \
1523 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept \
1524 { return static_cast<TYPE>(0); } \
1525 \
1526 static _GLIBCXX_CONSTEXPRconstexpr TYPE \
1527 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept \
1528 { return static_cast<TYPE>(0); } \
1529 \
1530 static _GLIBCXX_CONSTEXPRconstexpr TYPE \
1531 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept \
1532 { return static_cast<TYPE>(0); } \
1533 \
1534 static _GLIBCXX_CONSTEXPRconstexpr TYPE \
1535 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept \
1536 { return static_cast<TYPE>(0); } \
1537 \
1538 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false; \
1539 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true; \
1540 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false; \
1541 \
1542 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps \
1543 = __glibcxx_integral_trapstrue; \
1544 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false; \
1545 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style \
1546 = round_toward_zero; \
1547 }; \
1548 \
1549 template<> \
1550 struct numeric_limits<unsigned TYPE> \
1551 { \
1552 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true; \
1553 \
1554 static _GLIBCXX_CONSTEXPRconstexpr unsigned TYPE \
1555 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; } \
1556 \
1557 static _GLIBCXX_CONSTEXPRconstexpr unsigned TYPE \
1558 max() _GLIBCXX_USE_NOEXCEPTnoexcept \
1559 { return __glibcxx_max_b (unsigned TYPE, BITSIZE)(((unsigned TYPE)(-1) < 0) ? (((((unsigned TYPE)1 <<
((BITSIZE - ((unsigned TYPE)(-1) < 0)) - 1)) - 1) <<
1) + 1) : ~(unsigned TYPE)0)
; } \
1560 \
1561 UEXT \
1562 \
1563 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits \
1564 = BITSIZE; \
1565 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 \
1566 = BITSIZE * 643L / 2136; \
1567 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = false; \
1568 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = true; \
1569 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = true; \
1570 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = 2; \
1571 \
1572 static _GLIBCXX_CONSTEXPRconstexpr unsigned TYPE \
1573 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; } \
1574 \
1575 static _GLIBCXX_CONSTEXPRconstexpr unsigned TYPE \
1576 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0; } \
1577 \
1578 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = 0; \
1579 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = 0; \
1580 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = 0; \
1581 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = 0; \
1582 \
1583 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = false; \
1584 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = false; \
1585 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = false; \
1586 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm \
1587 = denorm_absent; \
1588 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss = false; \
1589 \
1590 static _GLIBCXX_CONSTEXPRconstexpr unsigned TYPE \
1591 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept \
1592 { return static_cast<unsigned TYPE>(0); } \
1593 \
1594 static _GLIBCXX_CONSTEXPRconstexpr unsigned TYPE \
1595 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept \
1596 { return static_cast<unsigned TYPE>(0); } \
1597 \
1598 static _GLIBCXX_CONSTEXPRconstexpr unsigned TYPE \
1599 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept \
1600 { return static_cast<unsigned TYPE>(0); } \
1601 \
1602 static _GLIBCXX_CONSTEXPRconstexpr unsigned TYPE \
1603 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept \
1604 { return static_cast<unsigned TYPE>(0); } \
1605 \
1606 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559 = false; \
1607 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true; \
1608 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = true; \
1609 \
1610 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_integral_trapstrue; \
1611 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before = false; \
1612 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style \
1613 = round_toward_zero; \
1614 };
1615
1616#if __cplusplus201402L >= 201103L
1617
1618#define __INT_N_201103(TYPE) \
1619 static constexpr TYPE \
1620 lowest() noexcept { return min(); } \
1621 static constexpr int max_digits10 = 0;
1622
1623#define __INT_N_U201103(TYPE) \
1624 static constexpr unsigned TYPE \
1625 lowest() noexcept { return min(); } \
1626 static constexpr int max_digits10 = 0;
1627
1628#else
1629#define __INT_N_201103(TYPE)
1630#define __INT_N_U201103(TYPE)
1631#endif
1632
1633#if !defined(__STRICT_ANSI__1)
1634#ifdef __GLIBCXX_TYPE_INT_N_0
1635 __INT_N(__GLIBCXX_TYPE_INT_N_0, __GLIBCXX_BITSIZE_INT_N_0,
1636 __INT_N_201103 (__GLIBCXX_TYPE_INT_N_0), __INT_N_U201103 (__GLIBCXX_TYPE_INT_N_0))
1637#endif
1638#ifdef __GLIBCXX_TYPE_INT_N_1
1639 __INT_N (__GLIBCXX_TYPE_INT_N_1, __GLIBCXX_BITSIZE_INT_N_1,
1640 __INT_N_201103 (__GLIBCXX_TYPE_INT_N_1), __INT_N_U201103 (__GLIBCXX_TYPE_INT_N_1))
1641#endif
1642#ifdef __GLIBCXX_TYPE_INT_N_2
1643 __INT_N (__GLIBCXX_TYPE_INT_N_2, __GLIBCXX_BITSIZE_INT_N_2,
1644 __INT_N_201103 (__GLIBCXX_TYPE_INT_N_2), __INT_N_U201103 (__GLIBCXX_TYPE_INT_N_2))
1645#endif
1646#ifdef __GLIBCXX_TYPE_INT_N_3
1647 __INT_N (__GLIBCXX_TYPE_INT_N_3, __GLIBCXX_BITSIZE_INT_N_3,
1648 __INT_N_201103 (__GLIBCXX_TYPE_INT_N_3), __INT_N_U201103 (__GLIBCXX_TYPE_INT_N_3))
1649#endif
1650
1651#elif defined __STRICT_ANSI__1 && defined __SIZEOF_INT128__16
1652 __INT_N(__int128, 128,
1653 __INT_N_201103 (__int128),
1654 __INT_N_U201103 (__int128))
1655#endif
1656
1657#undef __INT_N
1658#undef __INT_N_201103
1659#undef __INT_N_U201103
1660
1661
1662 /// numeric_limits<float> specialization.
1663 template<>
1664 struct numeric_limits<float>
1665 {
1666 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
1667
1668 static _GLIBCXX_CONSTEXPRconstexpr float
1669 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return __FLT_MIN__1.17549435e-38F; }
1670
1671 static _GLIBCXX_CONSTEXPRconstexpr float
1672 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __FLT_MAX__3.40282347e+38F; }
1673
1674#if __cplusplus201402L >= 201103L
1675 static constexpr float
1676 lowest() noexcept { return -__FLT_MAX__3.40282347e+38F; }
1677#endif
1678
1679 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = __FLT_MANT_DIG__24;
1680 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 = __FLT_DIG__6;
1681#if __cplusplus201402L >= 201103L
1682 static constexpr int max_digits10
1683 = __glibcxx_max_digits10 (__FLT_MANT_DIG__24);
1684#endif
1685 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = true;
1686 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = false;
1687 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = false;
1688 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = __FLT_RADIX__2;
1689
1690 static _GLIBCXX_CONSTEXPRconstexpr float
1691 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return __FLT_EPSILON__1.19209290e-7F; }
1692
1693 static _GLIBCXX_CONSTEXPRconstexpr float
1694 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0.5F; }
1695
1696 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = __FLT_MIN_EXP__(-125);
1697 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = __FLT_MIN_10_EXP__(-37);
1698 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = __FLT_MAX_EXP__128;
1699 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = __FLT_MAX_10_EXP__38;
1700
1701 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = __FLT_HAS_INFINITY__1;
1702 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = __FLT_HAS_QUIET_NAN__1;
1703 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = has_quiet_NaN;
1704 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
1705 = bool(__FLT_HAS_DENORM__1) ? denorm_present : denorm_absent;
1706 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss
1707 = __glibcxx_float_has_denorm_loss;
1708
1709 static _GLIBCXX_CONSTEXPRconstexpr float
1710 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return __builtin_huge_valf(); }
1711
1712 static _GLIBCXX_CONSTEXPRconstexpr float
1713 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return __builtin_nanf(""); }
1714
1715 static _GLIBCXX_CONSTEXPRconstexpr float
1716 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return __builtin_nansf(""); }
1717
1718 static _GLIBCXX_CONSTEXPRconstexpr float
1719 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return __FLT_DENORM_MIN__1.40129846e-45F; }
1720
1721 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559
1722 = has_infinity && has_quiet_NaN && has_denorm == denorm_present;
1723 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
1724 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false;
1725
1726 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_float_traps;
1727 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before
1728 = __glibcxx_float_tinyness_before;
1729 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
1730 = round_to_nearest;
1731 };
1732
1733#undef __glibcxx_float_has_denorm_loss
1734#undef __glibcxx_float_traps
1735#undef __glibcxx_float_tinyness_before
1736
1737 /// numeric_limits<double> specialization.
1738 template<>
1739 struct numeric_limits<double>
1740 {
1741 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
1742
1743 static _GLIBCXX_CONSTEXPRconstexpr double
1744 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return __DBL_MIN__2.2250738585072014e-308; }
1745
1746 static _GLIBCXX_CONSTEXPRconstexpr double
1747 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __DBL_MAX__1.7976931348623157e+308; }
1748
1749#if __cplusplus201402L >= 201103L
1750 static constexpr double
1751 lowest() noexcept { return -__DBL_MAX__1.7976931348623157e+308; }
1752#endif
1753
1754 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = __DBL_MANT_DIG__53;
1755 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 = __DBL_DIG__15;
1756#if __cplusplus201402L >= 201103L
1757 static constexpr int max_digits10
1758 = __glibcxx_max_digits10 (__DBL_MANT_DIG__53);
1759#endif
1760 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = true;
1761 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = false;
1762 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = false;
1763 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = __FLT_RADIX__2;
1764
1765 static _GLIBCXX_CONSTEXPRconstexpr double
1766 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return __DBL_EPSILON__2.2204460492503131e-16; }
1767
1768 static _GLIBCXX_CONSTEXPRconstexpr double
1769 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0.5; }
1770
1771 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = __DBL_MIN_EXP__(-1021);
1772 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = __DBL_MIN_10_EXP__(-307);
1773 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = __DBL_MAX_EXP__1024;
1774 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = __DBL_MAX_10_EXP__308;
1775
1776 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = __DBL_HAS_INFINITY__1;
1777 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = __DBL_HAS_QUIET_NAN__1;
1778 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = has_quiet_NaN;
1779 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
1780 = bool(__DBL_HAS_DENORM__1) ? denorm_present : denorm_absent;
1781 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss
1782 = __glibcxx_double_has_denorm_loss;
1783
1784 static _GLIBCXX_CONSTEXPRconstexpr double
1785 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return __builtin_huge_val(); }
1786
1787 static _GLIBCXX_CONSTEXPRconstexpr double
1788 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return __builtin_nan(""); }
1789
1790 static _GLIBCXX_CONSTEXPRconstexpr double
1791 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return __builtin_nans(""); }
1792
1793 static _GLIBCXX_CONSTEXPRconstexpr double
1794 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return __DBL_DENORM_MIN__4.9406564584124654e-324; }
1795
1796 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559
1797 = has_infinity && has_quiet_NaN && has_denorm == denorm_present;
1798 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
1799 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false;
1800
1801 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_double_traps;
1802 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before
1803 = __glibcxx_double_tinyness_before;
1804 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style
1805 = round_to_nearest;
1806 };
1807
1808#undef __glibcxx_double_has_denorm_loss
1809#undef __glibcxx_double_traps
1810#undef __glibcxx_double_tinyness_before
1811
1812 /// numeric_limits<long double> specialization.
1813 template<>
1814 struct numeric_limits<long double>
1815 {
1816 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_specialized = true;
1817
1818 static _GLIBCXX_CONSTEXPRconstexpr long double
1819 min() _GLIBCXX_USE_NOEXCEPTnoexcept { return __LDBL_MIN__3.36210314311209350626e-4932L; }
1820
1821 static _GLIBCXX_CONSTEXPRconstexpr long double
1822 max() _GLIBCXX_USE_NOEXCEPTnoexcept { return __LDBL_MAX__1.18973149535723176502e+4932L; }
1823
1824#if __cplusplus201402L >= 201103L
1825 static constexpr long double
1826 lowest() noexcept { return -__LDBL_MAX__1.18973149535723176502e+4932L; }
1827#endif
1828
1829 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits = __LDBL_MANT_DIG__64;
1830 static _GLIBCXX_USE_CONSTEXPRconstexpr int digits10 = __LDBL_DIG__18;
1831#if __cplusplus201402L >= 201103L
1832 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_digits10
1833 = __glibcxx_max_digits10 (__LDBL_MANT_DIG__64);
1834#endif
1835 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_signed = true;
1836 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_integer = false;
1837 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_exact = false;
1838 static _GLIBCXX_USE_CONSTEXPRconstexpr int radix = __FLT_RADIX__2;
1839
1840 static _GLIBCXX_CONSTEXPRconstexpr long double
1841 epsilon() _GLIBCXX_USE_NOEXCEPTnoexcept { return __LDBL_EPSILON__1.08420217248550443401e-19L; }
1842
1843 static _GLIBCXX_CONSTEXPRconstexpr long double
1844 round_error() _GLIBCXX_USE_NOEXCEPTnoexcept { return 0.5L; }
1845
1846 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent = __LDBL_MIN_EXP__(-16381);
1847 static _GLIBCXX_USE_CONSTEXPRconstexpr int min_exponent10 = __LDBL_MIN_10_EXP__(-4931);
1848 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent = __LDBL_MAX_EXP__16384;
1849 static _GLIBCXX_USE_CONSTEXPRconstexpr int max_exponent10 = __LDBL_MAX_10_EXP__4932;
1850
1851 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_infinity = __LDBL_HAS_INFINITY__1;
1852 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_quiet_NaN = __LDBL_HAS_QUIET_NAN__1;
1853 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_signaling_NaN = has_quiet_NaN;
1854 static _GLIBCXX_USE_CONSTEXPRconstexpr float_denorm_style has_denorm
1855 = bool(__LDBL_HAS_DENORM__1) ? denorm_present : denorm_absent;
1856 static _GLIBCXX_USE_CONSTEXPRconstexpr bool has_denorm_loss
1857 = __glibcxx_long_double_has_denorm_loss;
1858
1859 static _GLIBCXX_CONSTEXPRconstexpr long double
1860 infinity() _GLIBCXX_USE_NOEXCEPTnoexcept { return __builtin_huge_vall(); }
1861
1862 static _GLIBCXX_CONSTEXPRconstexpr long double
1863 quiet_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return __builtin_nanl(""); }
1864
1865 static _GLIBCXX_CONSTEXPRconstexpr long double
1866 signaling_NaN() _GLIBCXX_USE_NOEXCEPTnoexcept { return __builtin_nansl(""); }
1867
1868 static _GLIBCXX_CONSTEXPRconstexpr long double
1869 denorm_min() _GLIBCXX_USE_NOEXCEPTnoexcept { return __LDBL_DENORM_MIN__3.64519953188247460253e-4951L; }
1870
1871 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_iec559
1872 = has_infinity && has_quiet_NaN && has_denorm == denorm_present;
1873 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_bounded = true;
1874 static _GLIBCXX_USE_CONSTEXPRconstexpr bool is_modulo = false;
1875
1876 static _GLIBCXX_USE_CONSTEXPRconstexpr bool traps = __glibcxx_long_double_traps;
1877 static _GLIBCXX_USE_CONSTEXPRconstexpr bool tinyness_before =
1878 __glibcxx_long_double_tinyness_before;
1879 static _GLIBCXX_USE_CONSTEXPRconstexpr float_round_style round_style =
1880 round_to_nearest;
1881 };
1882
1883#undef __glibcxx_long_double_has_denorm_loss
1884#undef __glibcxx_long_double_traps
1885#undef __glibcxx_long_double_tinyness_before
1886
1887_GLIBCXX_END_NAMESPACE_VERSION
1888} // namespace
1889
1890#undef __glibcxx_signed
1891#undef __glibcxx_min
1892#undef __glibcxx_max
1893#undef __glibcxx_digits
1894#undef __glibcxx_digits10
1895#undef __glibcxx_max_digits10
1896
1897#endif // _GLIBCXX_NUMERIC_LIMITS